Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
05f3978
use exactextract
KriWay-LV Apr 8, 2025
fd1592f
Test
KriWay-LV Apr 8, 2025
5b0d6ed
use exactextract for zonal stats
KriWay-LV Apr 10, 2025
89e7e66
buffer = 0
KriWay-LV Apr 10, 2025
ee6156a
update changelog
KriWay-LV Apr 10, 2025
4634573
removed stats as parameter
KriWay-LV Apr 10, 2025
703cbe3
changed example
KriWay-LV May 7, 2025
8062e4a
add force parameter to zonal_stats_band_tofile
KriWay-LV May 7, 2025
6f9ce87
add force parameter to test
KriWay-LV May 7, 2025
8e71727
refactor
KriWay-LV May 7, 2025
2f17bd2
refactor
KriWay-LV May 7, 2025
e38cfde
refactor + add new test
KriWay-LV May 8, 2025
f43273b
refactor
KriWay-LV May 9, 2025
a73ec89
refactor
KriWay-LV May 9, 2025
fccb369
refactor
KriWay-LV May 10, 2025
cd6b9a6
Merge branch 'main' into feature/exactextract-vs-pyqgis
KriWay-LV May 10, 2025
bd292e1
refactor stats as list of strings
KriWay-LV May 12, 2025
a1c9ec7
new version of prc_befl_2023_2023_07_24.gpkg
KriWay-LV May 12, 2025
bbafed8
add zonal_stats_bulk_invalid test
KriWay-LV May 12, 2025
ca64d33
min_coverage_frac = 0.8
KriWay-LV May 12, 2025
260216c
refactor zonal_stats_bulk_invalid test
KriWay-LV May 12, 2025
cfa6049
addressed review comments
KriWay-LV May 12, 2025
af936dc
added test exactextract
KriWay-LV May 23, 2025
aacd902
Merge branch 'main' into pr/181
theroggy Aug 14, 2025
7def42d
Avoid very high commited memory being reserved
theroggy Aug 24, 2025
1cafab4
Small improvement to logging
theroggy Aug 24, 2025
bed1b87
Adapt min_parcels_with_data_prc to avoid s1 being lost due to buffer=0
theroggy Aug 24, 2025
61cccb5
Update calc_cropclass.py
theroggy Aug 24, 2025
306f172
Disable botocore.credentions info logging
theroggy Aug 24, 2025
268ceff
Small improvements to tests
theroggy Aug 24, 2025
fda5432
Simplify test
theroggy Aug 24, 2025
abfc247
Merge branch 'main' into pr/181
theroggy Aug 26, 2025
aa4c702
updated version
KriWay-LV Apr 27, 2026
ca2db2e
Merge branch 'main' into feature/exactextract-vs-pyqgis
KriWay-LV Apr 27, 2026
f8d67a1
expect float16 in stead of float32
KriWay-LV Apr 30, 2026
0811a8a
undo the changes
KriWay-LV Apr 30, 2026
6e8b0b9
redo changes
KriWay-LV May 4, 2026
b4b70b9
pinned version rasterio and tests with minimal python 3.12
KriWay-LV May 4, 2026
9464546
use python 3.13 for macos-latest test
KriWay-LV May 5, 2026
16c7b74
skip tests when certain rasterio version
KriWay-LV May 5, 2026
77090f6
rasterio not pinned
KriWay-LV May 5, 2026
0b61113
set minimal python version to 3.10
KriWay-LV May 5, 2026
e750708
refactor yml
KriWay-LV May 5, 2026
6f2bdc6
refactor yml
KriWay-LV May 5, 2026
cc950a1
Merge branch 'feature/fix-tests' into feature/exactextract-vs-pyqgis
KriWay-LV May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
matrix:
os: [ubuntu-latest]
dev: [false]
python: ["3.10", "3.11", "3.12"]
python: ["3.10", "3.11", "3.12", "3.13"]
env: ["latest"]
# Use openblas instead of mkl saves 600 MB. Linux OK, 50% slower on Windows and OSX!
extra: ["nomkl"]
Expand All @@ -44,7 +44,7 @@ jobs:
- env: latest
os: windows-latest
dev: false
python: "3.12"
python: "3.13"

steps:
- uses: actions/checkout@v6
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
- Add first version of a cover/bare soil marker (#168, #200)
- Avoid high committed memory for zonal stats calculation (#197)
- General small improvements, e.g. save randomforest models compressed,.. (#144)
- Use Exactextract as default engine for zonalstats calculation (#181)

### Bugs fixed

Expand Down
6 changes: 6 additions & 0 deletions cropclassification/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
"""Package with functionalities to support agricultural parcel monitoring."""

import logging

# Disable info logging pf botocore.credentials
logger_botocore_credentials = logging.getLogger("botocore.credentials")
logger_botocore_credentials.setLevel(logging.WARNING)
1 change: 1 addition & 0 deletions cropclassification/calc_cropclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def run_cropclass(
parceldata_aggregations_to_use = conf.marker.getlist(
"parceldata_aggregations_to_use"
)

ts.calc_timeseries_data(
input_parcel_path=imagedata_input_parcel_path,
roi_bounds=tuple(conf.roi.getlistfloat("roi_bounds")),
Expand Down
20 changes: 18 additions & 2 deletions cropclassification/general.ini
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,30 @@ on_missing_image = calculate_raise
# Configuration on how/which periodic images/timeseries statistics should be generated.
[timeseries]

# Engine to use for the timeseries calculation.
# Possible values are pyqgis, rasterstats and exactextract.
engine = exactextract

# Stats to calculate for the timeseries. The following stats are available:
Comment thread
KriWay-LV marked this conversation as resolved.
# - "rasterstats": documentation: https://pythonhosted.org/rasterstats/manual.html#statistics
# - "pyqgis": "count", "sum", "mean", "median", "std", "min", "max", "range", "minority", "majority" and "variance".
# - "exactextract": documentation: https://isciences.github.io/exactextract/operations.html
stats = [
"count(min_coverage_frac=1,coverage_weight=none)",
"mean(min_coverage_frac=1,coverage_weight=none)",
"median(min_coverage_frac=1,coverage_weight=none)",
"stdev(min_coverage_frac=1,coverage_weight=none)",
"min(min_coverage_frac=1,coverage_weight=none)",
"max(min_coverage_frac=1,coverage_weight=none)"
]
# Negative buffer to apply to input parcels to account for mixels.
buffer = 5
buffer = 0

# The maximum percentage cloudcover an (S2) image can have to be used.
max_cloudcover_pct = 15

# The min percentage of parcels that need to have valid data for a time+sensor to use it
min_parcels_with_data_pct = 80
min_parcels_with_data_pct = 75

# Configuration specific to the marker being calculated.
[marker]
Expand Down
1 change: 1 addition & 0 deletions cropclassification/helpers/config_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def read_config(
"list": lambda x: [i.strip() for i in x.split(",")],
"listint": lambda x: [int(i.strip()) for i in x.split(",")],
"listfloat": lambda x: [float(i.strip()) for i in x.split(",")],
"jsonlist": lambda x: None if x is None else json.loads(x),
"dict": lambda x: None if x is None else json.loads(x),
"path": lambda x: None if x is None else Path(x),
},
Expand Down
17 changes: 14 additions & 3 deletions cropclassification/preprocess/_timeseries_calc_openeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def calculate_periodic_timeseries(
imageprofiles: dict[str, ImageProfile],
images_periodic_dir: Path,
timeseries_periodic_dir: Path,
engine: str,
stats: list[str],
nb_parallel: int,
on_missing_image: str,
force: bool = False,
Expand All @@ -44,6 +46,16 @@ def calculate_periodic_timeseries(
images_periodic_dir (Path): directory where the images are stored.
timeseries_periodic_dir (Path): directory where the timeseries data will be
saved.
engine (str): the engine to use for the calculation. Options are
"exactextract", "rasterstats" and "pyqgis".
stats (list[str]): statistics to calculate. Available statistics and
special options are dependent on the `engine` specified:
Comment thread
KriWay-LV marked this conversation as resolved.

- "rasterstats": `rasterstats documentation <https://pythonhosted.org/rasterstats/manual.html#statistics>`_
- "pyqgis": "count", "sum", "mean", "median", "std", "min", "max",
"range", "minority", "majority" and "variance".
- "exactextract": `exactextract documentation <https://isciences.github.io/exactextract/operations.html>`_

nb_parallel (int): number of parallel processes to use.
on_missing_image (str): what to do when an image is missing. Options are:

Expand Down Expand Up @@ -88,14 +100,13 @@ def calculate_periodic_timeseries(
if temp_dir == "None":
temp_dir = Path(tempfile.gettempdir())

logger.info(f"Calculating timeseries for {len(images_bands)} images")
zonal_stats_bulk.zonal_stats(
vector_path=input_parcel_path,
id_column=conf.columns["id"],
rasters_bands=images_bands,
output_dir=timeseries_periodic_dir,
stats=["count", "mean", "median", "std", "min", "max"],
engine="pyqgis",
stats=stats,
engine=engine,
nb_parallel=nb_parallel,
force=force,
)
2 changes: 2 additions & 0 deletions cropclassification/preprocess/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def calc_timeseries_data(
imageprofiles=conf.image_profiles,
images_periodic_dir=conf.paths.getpath("images_periodic_dir"),
timeseries_periodic_dir=timeseries_periodic_dir,
engine=conf.timeseries.get("engine"),
stats=conf.timeseries.getjsonlist("stats"),
nb_parallel=conf.general.getint("nb_parallel", -1),
on_missing_image=conf.images.get("on_missing_image", "calculate_raise"),
force=force,
Expand Down
11 changes: 6 additions & 5 deletions cropclassification/util/zonal_stats_bulk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Calculate zonal statistics for a vector file with many raster files."""

import logging
from pathlib import Path

from . import (
Expand All @@ -9,16 +10,18 @@
)
from ._raster_helper import * # noqa: F403

logger = logging.getLogger(__name__)


def zonal_stats(
vector_path: Path,
id_column: str,
rasters_bands: list[tuple[Path, list[str]]],
output_dir: Path,
engine: str,
stats: list[str] | str | None = None,
cloud_filter_band: str | None = None,
calc_bands_parallel: bool = True,
engine: str = "rasterstats",
nb_parallel: int = -1,
force: bool = False,
) -> None:
Expand All @@ -31,21 +34,19 @@ def zonal_stats(
rasters_bands (List[Tuple[Path, List[str]]]): List of tuples with the path to
the raster files and the bands to calculate the zonal statistics on.
output_dir (Path): directory to write the results to.
engine (str): the engine to use for the calculation. Options are
"exactextract", "rasterstats" and "pyqgis".
stats (List[str]): statistics to calculate. Default to ["count", "median"].
Available statistics and special options are dependent on the `engine`
specified:

- "rasterstats": `rasterstats documentation <https://pythonhosted.org/rasterstats/manual.html#statistics>`_
- "pyqgis": "count", "sum", "mean", "median", "std", "min", "max",
"range", "minority", "majority" and "variance".
- "exactextract": `exactextract documentation <https://isciences.github.io/exactextract/operations.html>`_

cloud_filter_band (str, optional): the band to use as a cloud filter. Only
supported for engine "rasterstats". Defaults to None.
calc_bands_parallel (bool, optional): True to calculate the bands in parallel.
Only supported for engine "rasterstats". Defaults to True.
engine (str, optional): the engine to use for the calculation. Options are
"exactextract", "rasterstats" and "pyqgis". Defaults to "rasterstats".
nb_parallel (int, optional): the number of parallel processes to use.
Defaults to -1: use all available processors.
force (bool, optional): False to skip calculating existing output files. True to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def zonal_stats(
tmp_dir=tmp_dir,
include_cols=columns,
output_paths=output_paths,
force=force,
)
calc_queue[future] = {
"vector_path": vector_path,
Expand Down Expand Up @@ -236,9 +237,10 @@ def zonal_stats_band(
output="pandas",
include_cols=include_cols,
)

except Exception:
raise
except Exception as ex:
message = f"Error calculating zonal stats {stats}: {ex}"
logger.error(message)
raise ValueError(message) from ex

return stats_df

Expand All @@ -253,13 +255,6 @@ def zonal_stats_band_tofile(
include_cols: list[str],
force: bool = False,
) -> dict[str, Path]:
# Init
if all(output_path.exists() for output_path in output_paths.values()):
if force:
for output_path in output_paths.values():
output_path.unlink(missing_ok=True)
return output_paths

stats_df = zonal_stats_band(
vector_path=vector_path,
raster_path=raster_path,
Expand All @@ -273,17 +268,26 @@ def zonal_stats_band_tofile(
for band in bands:
index = raster_info.bands[band].band_index
band_columns = include_cols.copy()
band_columns.extend(
[f"band_{index}_{stat}" for stat in [stat.split("(")[0] for stat in stats]]
)
band_stats_df = stats_df[band_columns].copy()
band_stats_df.rename(
columns={
f"band_{index}_{stat}": stat
for stat in [stat.split("(")[0] for stat in stats]
},
inplace=True,
)
if len(bands) == 1:
band_columns.extend(
[f"{stat}" for stat in [stat.split("(")[0] for stat in stats]]
)
band_stats_df = stats_df[band_columns].copy()
else:
band_columns.extend(
[
f"band_{index}_{stat}"
for stat in [stat.split("(")[0] for stat in stats]
]
)
band_stats_df = stats_df[band_columns].copy()
band_stats_df.rename(
columns={
f"band_{index}_{stat}": stat
for stat in [stat.split("(")[0] for stat in stats]
},
inplace=True,
)
# Add fid column to the beginning of the dataframe
band_stats_df.insert(0, "fid", range(len(band_stats_df)))

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"imageprofile": "s1-grd-sigma0-vvdvh-asc-weekly",
"collection": null,
"index_type": "vvdvh",
"max_cloud_cover": null,
"image_source": "local",
"base_imageprofile": "s1-grd-sigma0-asc-weekly",
"pixel_type": "FLOAT32",
"satellite": "s1",
"roi_bounds": [
161400.0,
188000.0,
161900.0,
188500.0
],
"roi_crs": "31370",
"start_date": "2024-03-04",
"end_date_incl": "2024-03-10",
"end_date": "2024-03-11",
"period_name": "weekly",
"weeks": [
10
],
"bands": [
"vvdvh"
],
"time_reducer": "last",
"path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif",
"job_options": null,
"process_options": null
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"imageprofile": "s1-grd-sigma0-vvdvh-asc-weekly",
"collection": null,
"index_type": "vvdvh",
"max_cloud_cover": null,
"image_source": "local",
"base_imageprofile": "s1-grd-sigma0-asc-weekly",
"pixel_type": "FLOAT32",
"satellite": "s1",
"roi_bounds": [
161400.0,
188000.0,
161900.0,
188500.0
],
"roi_crs": "31370",
"start_date": "2024-03-11",
"end_date_incl": "2024-03-17",
"end_date": "2024-03-18",
"period_name": "weekly",
"weeks": [
11
],
"bands": [
"vvdvh"
],
"time_reducer": "last",
"path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif",
"job_options": null,
"process_options": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"imageprofile": "s1-grd-sigma0-vvdvh-desc-weekly",
"collection": null,
"index_type": "vvvh",
"max_cloud_cover": null,
"image_source": "local",
"base_imageprofile": "s1-grd-sigma0-desc-weekly",
"pixel_type": "FLOAT32",
"satellite": "s1",
"roi_bounds": [
161400.0,
188000.0,
161900.0,
188500.0
],
"roi_crs": "31370",
"start_date": "2024-03-04",
"end_date_incl": "2024-03-10",
"end_date": "2024-03-11",
"period_name": "weekly",
"weeks": [
10
],
"bands": [
"vvdvh"
],
"time_reducer": "last",
"path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif",
"job_options": null,
"process_options": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"imageprofile": "s1-grd-sigma0-vvdvh-desc-weekly",
"collection": null,
"index_type": "vvvh",
"max_cloud_cover": null,
"image_source": "local",
"base_imageprofile": "s1-grd-sigma0-desc-weekly",
"pixel_type": "FLOAT32",
"satellite": "s1",
"roi_bounds": [
161400.0,
188000.0,
161900.0,
188500.0
],
"roi_crs": "31370",
"start_date": "2024-03-11",
"end_date_incl": "2024-03-17",
"end_date": "2024-03-18",
"period_name": "weekly",
"weeks": [
11
],
"bands": [
"vvdvh"
],
"time_reducer": "last",
"path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif",
"job_options": null,
"process_options": null
}
Loading
Loading