diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..99ec52c --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,28 @@ +# LIFE Pipeline Configuration +# =========================== + +# Taxonomic classes to process +taxa: + - AMPHIBIA + - AVES + - MAMMALIA + - REPTILIA + +# Pixel scale +pixel_scale: 0.016666666666667 + +# Hyde projection data +hyde_projection: 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' +hyde_pixel_scale: 0.08333333333333333 + +# Zenodo configuration for downloading raw habitat +zenodo: + jung_habitat: + zenodo_id: 4058819 + filename: "iucn_habitatclassification_composite_lvl2_ver004.zip" + jung_habitat_updates: + zenodo_id: 4058819 + filename: "lvl2_changemasks_ver004.zip" + jung_pnv: + zenodo_id: 4038749 + filename: "pnv_lvl1_004.zip" diff --git a/deltap/delta_p_scaled.py b/deltap/delta_p_scaled.py index aed4923..bf981ba 100644 --- a/deltap/delta_p_scaled.py +++ b/deltap/delta_p_scaled.py @@ -4,66 +4,57 @@ from pathlib import Path import pandas as pd -import yirgacheffe.operators as yo -from yirgacheffe.layers import RasterLayer +import yirgacheffe as yg SCALE = 1e6 def delta_p_scaled_area( input_path: Path, diff_area_map_path: Path, - totals_path: Path, + species_totals_path: Path, output_path: Path, ): os.makedirs(output_path.parent, exist_ok=True) per_taxa = [ - RasterLayer.layer_from_file(os.path.join(input_path, x)) - for x in sorted(input_path.glob("*.tif")) + yg.read_raster(x) for x in sorted(input_path.glob("*.tif")) ] if not per_taxa: sys.exit(f"Failed to find any per-taxa maps in {input_path}") - area_restore = RasterLayer.layer_from_file(diff_area_map_path) + species_total_counts = pd.read_csv(species_totals_path) - total_counts = pd.read_csv(totals_path) + diff_area = yg.read_raster(diff_area_map_path) + diff_area_rescaled = yg.where(diff_area < SCALE, float('nan'), diff_area / SCALE) - area_restore_filter = yo.where(area_restore < SCALE, float('nan'), area_restore) / SCALE - - with RasterLayer.empty_raster_layer_like( - area_restore, - filename=output_path, - nodata=float('nan'), - bands=len(per_taxa) + 1 - ) as result: + # Process all species in total + total_species_count = int(species_total_counts[species_total_counts.taxa=="all"]["count"].values[0]) + summed_layer = yg.sum(per_taxa) + all_scaled_filtered_layer = yg.where( + diff_area_rescaled != 0, + ((summed_layer / diff_area_rescaled) * -1.0) / total_species_count, + float('nan') + ) - species_count = int(total_counts[total_counts.taxa=="all"]["count"].values[0]) + bands = [all_scaled_filtered_layer] + labels = ["all"] - result._dataset.GetRasterBand(1).SetDescription("all") # pylint: disable=W0212 - summed_layer = per_taxa[0] - for layer in per_taxa[1:]: - summed_layer = summed_layer + layer + # Now per taxa + for inlayer in per_taxa: + # get the taxa from the filename + _, name = os.path.split(inlayer.name) + taxa = name[:-4] + labels.append(taxa) - scaled_filtered_layer = yo.where( - area_restore_filter != 0, - ((summed_layer / area_restore_filter) * -1.0) / species_count, + taxa_species_count = int(species_total_counts[species_total_counts.taxa==taxa]["count"].values[0]) + scaled_filtered_layer = yg.where( + diff_area_rescaled != 0, + ((inlayer / diff_area_rescaled) * -1.0) / taxa_species_count, float('nan') ) - scaled_filtered_layer.parallel_save(result, band=1) - - for idx, inlayer in enumerate(per_taxa): - assert inlayer.name - _, name = os.path.split(inlayer.name) - taxa = name[:-4] - species_count = int(total_counts[total_counts.taxa==taxa]["count"].values[0]) - result._dataset.GetRasterBand(idx + 2).SetDescription(taxa) # pylint: disable=W0212 - scaled_filtered_layer = yo.where( - area_restore_filter != 0, - ((inlayer / area_restore_filter) * -1.0) / species_count, - float('nan') - ) - scaled_filtered_layer.parallel_save(result, band=idx + 2) + bands.append(scaled_filtered_layer) + yg.to_geotiff(output_path, bands, labels, nodata=float('nan')) def main() -> None: parser = argparse.ArgumentParser(description="Scale final results for publication.") diff --git a/deltap/global_code_residents_pixel.py b/deltap/global_code_residents_pixel.py index 6bb310d..5d5f5da 100644 --- a/deltap/global_code_residents_pixel.py +++ b/deltap/global_code_residents_pixel.py @@ -1,298 +1,293 @@ import argparse +import json import math import os -import shutil import sys -from enum import Enum -from tempfile import TemporaryDirectory -from typing import Union +from pathlib import Path -import geopandas as gpd -import numpy as np -from osgeo import gdal -from yirgacheffe.layers import RasterLayer, ConstantLayer +import yirgacheffe as yg + +# This isn't a hard requirement, but in practice most experiments use 0.25, and the original +# paper used the other three values for comparison. Other values are valid, but to save wasted +# times with typos, we do restrict this to the subset used for the paper. +FLOAT_EXPONENTS = {0.1, 0.25, 0.5, 1.0} GOMPERTZ_A = 2.5 GOMPERTZ_B = -14.5 GOMPERTZ_ALPHA = 1 -class Season(Enum): - RESIDENT = 1 - BREEDING = 2 - NONBREEDING = 3 +def open_layer(filename: Path) -> tuple[yg.YirgacheffeLayer,float]: + """We use this helper function for two reasons: + 1. The delta-p values are quite small, and so we want to ensure things are in float64. + 2. We almost always need the total area, but rather than calculate it we can get that + from the JSON file that sits besides the TIFF. + """ + # The "nan" is an artefact of bouncing the data via pandas + if filename.name == "nan": + return yg.constant(0.0), 0.0 -def gen_gompertz(x: float) -> float: - return math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA)))) + layer = yg.read_raster(filename) -def numpy_gompertz(x: float) -> float: - return np.exp(-np.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA)))) + json_filename = filename.parent / f"{filename.stem}.json" + with open(json_filename, "r", encoding="utf-8") as f: + data = json.load(f) + total_aoh = data["aoh_total"] -def open_layer_as_float64(filename: str) -> Union[ConstantLayer,RasterLayer]: - if filename == "nan": - return ConstantLayer(0.0) - layer = RasterLayer.layer_from_file(filename) - if layer.datatype == gdal.GDT_Float64: - return layer - layer64 = RasterLayer.empty_raster_layer_like(layer, datatype=gdal.GDT_Float64) - layer.save(layer64) - return layer64 + return layer, total_aoh -def calc_persistence_value(current_aoh: float, historic_aoh: float, exponent_func) -> float: - sp_p = exponent_func(current_aoh / historic_aoh) - sp_p_fix = 1 if sp_p > 1 else sp_p - return sp_p_fix +def calc_persistence_value( + current_aoh: float, + historic_aoh: float, + exponent: str | float +) -> float: + scaled_aoh = current_aoh / historic_aoh + if isinstance(exponent, float): + sp_p = scaled_aoh ** exponent + else: + assert exponent == "gompetz" + sp_p = math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (scaled_aoh ** GOMPERTZ_ALPHA)))) + return 1.0 if sp_p > 1.0 else sp_p def process_delta_p( - current: Union[ConstantLayer,RasterLayer], - scenario: Union[ConstantLayer,RasterLayer], + current: yg.YirgacheffeLayer, + scenario: yg.YirgacheffeLayer | float, current_aoh: float, historic_aoh: float, - exponent_func_raster -) -> RasterLayer: - # In theory we could recalc current_aoh, but given we already have it don't duplicate work - # New section added in: Calculating for rasters rather than csv's - + exponent: str | float +) -> yg.YirgacheffeLayer: - new_p = ((ConstantLayer(current_aoh) - current) + scenario) / historic_aoh + new_aoh = (current_aoh - current) + scenario - - const_layer = ConstantLayer(current_aoh) - calc_1 = (const_layer - current) + scenario - new_aoh = RasterLayer.empty_raster_layer_like(current) - calc_1.save(new_aoh) - - calc_2 = (new_aoh / historic_aoh).numpy_apply(exponent_func_raster) - calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk)) - new_p = RasterLayer.empty_raster_layer_like(new_aoh) - calc_2.save(new_p) + scaled_aoh = new_aoh / historic_aoh + if isinstance(exponent, float): + calc_2 = scaled_aoh ** exponent + else: + assert exponent == "gompetz" + calc_2 = yg.exp(-yg.exp(GOMPERTZ_A + (GOMPERTZ_B * (scaled_aoh ** GOMPERTZ_ALPHA)))) + new_p = yg.where(calc_2 > 1, 1, calc_2) return new_p def global_code_residents_pixel_ae( - species_data_path: str, - current_aohs_path: str, - scenario_aohs_path: str, - historic_aohs_path: str, - exponent: str, - output_folder: str, + taxid: int, + season: str, + current_aohs_path: Path, + scenario_aohs_path: Path, + historic_aohs_path: Path, + exponent: str | float, + output_folder: Path, ) -> None: os.makedirs(output_folder, exist_ok=True) - os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" - try: - filtered_species_info = gpd.read_file(species_data_path) - except: # pylint:disable=W0702 - sys.exit(f"Failed to read {species_data_path}") - taxid = filtered_species_info.id_no.values[0] - season = Season[filtered_species_info.season.values[0]] - - try: - exp_val = float(exponent) - z_exponent_func_float = lambda x: np.float_power(x, exp_val) - z_exponent_func_raster = lambda x: np.float_power(x, exp_val) - except ValueError: - if exponent == "gompertz": - z_exponent_func_float = gen_gompertz - z_exponent_func_raster = numpy_gompertz - else: - sys.exit(f"unrecognised exponent {exponent}") - match season: - case Season.RESIDENT: - filename = f"{taxid}_{season.name}.tif" + case "RESIDENT": + filename = f"{taxid}_{season}.tif" try: - current = open_layer_as_float64(os.path.join(current_aohs_path, filename)) + current, current_aoh = open_layer(current_aohs_path / filename) except FileNotFoundError: - print(f"Failed to open current layer {os.path.join(current_aohs_path, filename)}") - sys.exit() + sys.exit(f"Failed to open current layer {current_aohs_path / filename}") try: - scenario = open_layer_as_float64(os.path.join(scenario_aohs_path, filename)) + scenario: yg.YirgacheffeLayer | float + scenario, _ = open_layer(scenario_aohs_path / filename) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario - scenario = ConstantLayer(0.0) + scenario = 0.0 try: - historic_aoh = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum() + _, historic_aoh = open_layer(historic_aohs_path / filename) except FileNotFoundError: - print(f"Failed to open historic layer {os.path.join(historic_aohs_path, filename)}") - sys.exit() + sys.exit(f"Failed to open historic layer {historic_aohs_path / filename}") if historic_aoh == 0.0: - print(f"Historic AoH for {taxid} is zero, aborting") - sys.exit() + sys.exit(f"Historic AoH for {taxid} is zero, aborting") - # print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_aoh.sum()}") + old_persistence = calc_persistence_value(current_aoh, historic_aoh, exponent) - layers = [current, scenario] - union = RasterLayer.find_union(layers) + + # In general Yirgacheffe can infer the behaviour needed for area intersections based on + # operator, but in this instance we want to force the caclulation to take place for the + # union of the areas involved. + layers = [x for x in [current, scenario] if isinstance(x, yg.YirgacheffeLayer)] + union = yg.layers.RasterLayer.find_union(layers) for layer in layers: - try: - layer.set_window_for_union(union) - except ValueError: - pass + layer.set_window_for_union(union) - current_aoh = current.sum() + new_p_layer = process_delta_p(current, scenario, current_aoh, historic_aoh, exponent) - new_p_layer = process_delta_p(current, scenario, current_aoh, historic_aoh, z_exponent_func_raster) - print(new_p_layer.sum()) + delta_p = new_p_layer - old_persistence - old_persistence = calc_persistence_value(current_aoh, historic_aoh, z_exponent_func_float) - print(old_persistence) - calc = new_p_layer - ConstantLayer(old_persistence) + try: + delta_p.to_geotiff(output_folder / filename) + except ValueError: + sys.exit(f"Failed to align layers for {taxid}_{season}") - with TemporaryDirectory() as tmpdir: - tmpfile = os.path.join(tmpdir, filename) - with RasterLayer.empty_raster_layer_like(new_p_layer, filename=tmpfile) as delta_p: - calc.save(delta_p) - shutil.move(tmpfile, os.path.join(output_folder, filename)) - case Season.NONBREEDING: - nonbreeding_filename = f"{taxid}_{Season.NONBREEDING.name}.tif" - breeding_filename = f"{taxid}_{Season.BREEDING.name}.tif" + case "NONBREEDING": + nonbreeding_filename = f"{taxid}_NONBREEDING.tif" + breeding_filename = f"{taxid}_BREEDING.tif" try: - with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)) as aoh: - historic_aoh_breeding = aoh.sum() + _, historic_aoh_breeding = open_layer(historic_aohs_path / breeding_filename) if historic_aoh_breeding == 0.0: - print(f"Historic AoH breeding for {taxid} is zero, aborting") - sys.exit() + sys.exit(f"Historic AoH breeding for {taxid} is zero, aborting") except FileNotFoundError: - print(f"Historic AoH for breeding {taxid} not found, aborting") - sys.exit() + sys.exit(f"Historic AoH for breeding {taxid} not found, aborting") try: - with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)) as aoh: - historic_aoh_non_breeding = aoh.sum() + _, historic_aoh_non_breeding = open_layer(historic_aohs_path / nonbreeding_filename) if historic_aoh_non_breeding == 0.0: - print(f"Historic AoH for non breeding {taxid} is zero, aborting") - sys.exit() + sys.exit(f"Historic AoH for non breeding {taxid} is zero, aborting") except FileNotFoundError: - print(f"Historic AoH for non breeding {taxid} not found, aborting") - sys.exit() + sys.exit(f"Historic AoH for non breeding {taxid} not found, aborting") - - if scenario_aohs_path != "nan": - non_breeding_scenario_path = os.path.join(scenario_aohs_path, nonbreeding_filename) - breeding_scenario_path = os.path.join(scenario_aohs_path, breeding_filename) + if scenario_aohs_path.name != "nan": + non_breeding_scenario_path = scenario_aohs_path / nonbreeding_filename + breeding_scenario_path = scenario_aohs_path / breeding_filename else: - non_breeding_scenario_path = "nan" - breeding_scenario_path = "nan" + non_breeding_scenario_path = Path("nan") # nan path is the sentinel from csv inputs + breeding_scenario_path = Path("nan") try: - current_breeding = open_layer_as_float64(os.path.join(current_aohs_path, breeding_filename)) + current_breeding, current_aoh_breeding = open_layer(current_aohs_path / breeding_filename) except FileNotFoundError: - print(f"Failed to open current breeding {os.path.join(current_aohs_path, breeding_filename)}") - sys.exit() + sys.exit(f"Failed to open current breeding {current_aohs_path / breeding_filename}") try: - current_non_breeding = open_layer_as_float64(os.path.join(current_aohs_path, nonbreeding_filename)) + current_non_breeding, current_aoh_non_breeding = open_layer(current_aohs_path / nonbreeding_filename) except FileNotFoundError: - print(f"Failed to open current non breeding {os.path.join(current_aohs_path, nonbreeding_filename)}") - sys.exit() + sys.exit(f"Failed to open current non breeding {current_aohs_path / nonbreeding_filename}") try: - scenario_breeding = open_layer_as_float64(breeding_scenario_path) + scenario_breeding: yg.YirgacheffeLayer | float + scenario_breeding, _ = open_layer(breeding_scenario_path) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario - scenario_breeding = ConstantLayer(0.0) + scenario_breeding = 0.0 try: - scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path) + scenario_non_breeding: yg.YirgacheffeLayer | float + scenario_non_breeding, _ = open_layer(non_breeding_scenario_path) except FileNotFoundError: # If there is a current but now scenario file it's because the species went extinct under the scenario - scenario_non_breeding = ConstantLayer(0.0) - - layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding] - union = RasterLayer.find_union(layers) - for layer in layers: - try: - layer.set_window_for_union(union) - except ValueError: - pass + scenario_non_breeding = 0.0 - current_aoh_breeding = current_breeding.sum() persistence_breeding = calc_persistence_value( current_aoh_breeding, historic_aoh_breeding, - z_exponent_func_float + exponent, ) - current_aoh_non_breeding = current_non_breeding.sum() persistence_non_breeding = calc_persistence_value( current_aoh_non_breeding, historic_aoh_non_breeding, - z_exponent_func_float + exponent, ) old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5) + # + # In general Yirgacheffe can infer the behaviour needed for area intersections based on + # operator, but in this instance we want to force the caclulation to take place for the + # union of the areas involved. + src_layers = [current_breeding, scenario_breeding, current_non_breeding, scenario_non_breeding] + layers = [x for x in src_layers if isinstance(x, yg.YirgacheffeLayer)] + union = yg.layers.RasterLayer.find_union(layers) + for layer in layers: + layer.set_window_for_union(union) + + + new_p_breeding = process_delta_p( current_breeding, scenario_breeding, current_aoh_breeding, historic_aoh_breeding, - z_exponent_func_raster + exponent, ) new_p_non_breeding = process_delta_p( current_non_breeding, scenario_non_breeding, current_aoh_non_breeding, historic_aoh_non_breeding, - z_exponent_func_raster + exponent, ) new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5) - delta_p_layer = new_p_layer - ConstantLayer(old_persistence) + delta_p_layer = new_p_layer - old_persistence - with TemporaryDirectory() as tmpdir: - tmpfile = os.path.join(tmpdir, nonbreeding_filename) - with RasterLayer.empty_raster_layer_like(new_p_breeding, filename=tmpfile) as output: - delta_p_layer.save(output) - shutil.move(tmpfile, os.path.join(output_folder, nonbreeding_filename)) + try: + delta_p_layer.to_geotiff(output_folder / nonbreeding_filename) + except ValueError: + delta_p_layer.pretty_print() + sys.exit(f"Failed to align layers for {taxid}_{season}") - case Season.BREEDING: + case "BREEDING": pass # covered by the nonbreeding case case _: sys.exit(f"Unexpected season for species {taxid}: {season}") +def exponent_type(value: str): + if value == "gompertz": + return value + try: + f = float(value) + except ValueError as exc: + raise argparse.ArgumentTypeError(f"invalid exponent: {value!r}") from exc + if f not in FLOAT_EXPONENTS: + raise argparse.ArgumentTypeError(f"numeric exponent must be one of {sorted(FLOAT_EXPONENTS)}, got {f}") + return f + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( - '--speciesdata', + '--taxid', + type=int, + required=True, + dest='taxid', + help="Species taxon id", + ) + parser.add_argument( + '--season', type=str, - help="Single species/seasonality geojson", required=True, - dest="species_data_path" + dest='season', + help="Seasonality", ) parser.add_argument( '--current_path', - type=str, + type=Path, required=True, dest="current_path", help="path to species current AOH hex" ) parser.add_argument( '--scenario_path', - type=str, + type=Path, required=True, dest="scenario_path", help="path to species scenario AOH hex" ) parser.add_argument( '--historic_path', - type=str, - required=False, + type=Path, + required=True, dest="historic_path", help="path to species historic AOH hex" ) parser.add_argument('--output_path', - type=str, + type=Path, required=True, dest="output_path", help="path to save output csv" ) - parser.add_argument('--z', dest='exponent', type=str, default='0.25') + parser.add_argument( + '--z', + dest='exponent', + type=exponent_type, + default=0.25 + ) args = parser.parse_args() global_code_residents_pixel_ae( - args.species_data_path, + args.taxid, + args.season, args.current_path, args.scenario_path, args.historic_path, diff --git a/local/merge_global_habitat.py b/local/merge_global_habitat.py index 73f247e..aa4d660 100644 --- a/local/merge_global_habitat.py +++ b/local/merge_global_habitat.py @@ -1,32 +1,35 @@ +""" +This script is used to merge localised raster data into a global raster map. + +Specifically it was used originally to merge crosswalked Brazil Mapbiomass data +into the Jung habitat map. +""" + import argparse +from contextlib import nullcontext from pathlib import Path -from yirgacheffe.layers import RasterLayer, RescaledRasterLayer -import yirgacheffe.operators as yo - -from osgeo import gdal -gdal.SetCacheMax(32 * 1024 * 1024) +import yirgacheffe as yg +from alive_progress import alive_bar # type: ignore def merge_global_habitat( global_layer_path: Path, local_layer_path: Path, output_layer_path: Path, + show_progress: bool, ) -> None: # Note, we assume naively the local data is higher resolution than the global layer for now - with RasterLayer.layer_from_file(local_layer_path) as local_layer: - with RescaledRasterLayer.layer_from_file( - global_layer_path, - pixel_scale=local_layer.pixel_scale - ) as global_layer: - local_layer.set_window_for_union(global_layer.area) - cleared = local_layer.nan_to_num() - combined = yo.where(cleared != 0, local_layer, global_layer) - with RasterLayer.empty_raster_layer_like( - combined, - filename=output_layer_path, - datatype=local_layer.datatype - ) as result: - combined.parallel_save(result, parallelism=200) + # In a better world we'd work out which is higher res and make everything in that pixel scale + with ( + yg.read_raster(local_layer_path) as local_layer, + yg.read_raster_like(global_layer_path, local_layer, yg.ResamplingMethod.Nearest) as global_layer, + ): + local_layer.set_window_for_union(global_layer.area) + cleared = local_layer.nan_to_num() + combined = yg.where(cleared != 0, local_layer, global_layer) + ctx = alive_bar(manual=True) if show_progress else nullcontext() + with ctx as bar: + combined.to_geotiff(output_layer_path, callback=bar, parallelism=True) def main() -> None: parser = argparse.ArgumentParser() @@ -51,12 +54,21 @@ def main() -> None: dest="output_layer_path", help="Result combined raster path", ) + parser.add_argument( + '-p', + help="Show progress indicator", + default=False, + required=False, + action='store_true', + dest='show_progress', + ) args = parser.parse_args() merge_global_habitat( args.global_layer_path, args.local_layer_path, args.output_layer_path, + args.show_progress, ) if __name__ == "__main__": diff --git a/prepare_layers/build_gaez_hyde.py b/prepare_layers/build_gaez_hyde.py index de0e632..06f1842 100644 --- a/prepare_layers/build_gaez_hyde.py +++ b/prepare_layers/build_gaez_hyde.py @@ -1,13 +1,9 @@ import argparse -import math import os -import tempfile from pathlib import Path import yirgacheffe as yg -import yirgacheffe.operators as yo - -from make_area_map import make_area_map +from snakemake_argparse_bridge import snakemake_compatible # type: ignore DISAGG_CUTOFF = yg.constant(0.95) @@ -23,35 +19,35 @@ def build_gaez_hyde( assert gaez.map_projection == hyde.map_projection projection = gaez.map_projection - with tempfile.TemporaryDirectory() as tmpdir: - area_raster = Path(tmpdir) / "area.tif" - make_area_map(math.fabs(projection.ystep), area_raster) - - with yg.read_narrow_raster(area_raster) as area: - - portional_hyde = (hyde.nan_to_num() * 1000000) / area - portional_gaez = gaez / 100.0 + with yg.area_raster(projection) as area: + portional_hyde = (hyde.nan_to_num() * 1000000) / area + portional_gaez = gaez / 100.0 - # where gaez and hyde disagree (sum greater than disagg cutoff), scale down - uncapped_total = portional_gaez + portional_hyde - # NaNs stop warnings about divide by zero - uncapped_total_with_nan = yo.where(uncapped_total == 0.0, float("nan"), uncapped_total) + # where gaez and hyde disagree (sum greater than disagg cutoff), scale down + uncapped_total = portional_gaez + portional_hyde + # NaNs stop warnings about divide by zero + uncapped_total_with_nan = yg.where(uncapped_total == 0.0, float("nan"), uncapped_total) - # calculate ag-perc scalars - total = yo.where( - uncapped_total_with_nan >= DISAGG_CUTOFF, - DISAGG_CUTOFF - (yg.constant(1) / yo.exp(uncapped_total_with_nan * 2)), - uncapped_total_with_nan, - ) + # calculate ag-perc scalars + total = yg.where( + uncapped_total_with_nan >= DISAGG_CUTOFF, + DISAGG_CUTOFF - (yg.constant(1) / yg.exp(uncapped_total_with_nan * 2)), + uncapped_total_with_nan, + ) - gaez_ratio = portional_gaez / uncapped_total_with_nan - gaez_values = total * gaez_ratio - gaez_values.to_geotiff(output_dir_path / "crop.tif") + gaez_ratio = portional_gaez / uncapped_total_with_nan + gaez_values = total * gaez_ratio + gaez_values.to_geotiff(output_dir_path / "crop.tif") - hyde_ratio = portional_hyde / uncapped_total_with_nan - hyde_values = total * hyde_ratio - hyde_values.to_geotiff(output_dir_path / "pasture.tif") + hyde_ratio = portional_hyde / uncapped_total_with_nan + hyde_values = total * hyde_ratio + hyde_values.to_geotiff(output_dir_path / "pasture.tif") +@snakemake_compatible(mapping={ + "gaez_path": "input.gaez_raster", + "hyde_path": "input.hyde_raster", + "output_dir_path": "params.output_dir", +}) def main() -> None: parser = argparse.ArgumentParser(description="Generate a combined GAEZ and Hyde layers") parser.add_argument( diff --git a/prepare_layers/generate_crosswalk.py b/prepare_layers/generate_crosswalk.py index e9fcfd9..9e909e5 100644 --- a/prepare_layers/generate_crosswalk.py +++ b/prepare_layers/generate_crosswalk.py @@ -1,8 +1,10 @@ import argparse import os +from pathlib import Path import pandas as pd from iucn_modlib.translator import toJung +from snakemake_argparse_bridge import snakemake_compatible # Take from https://www.iucnredlist.org/resources/habitat-classification-scheme @@ -31,11 +33,9 @@ ] def generate_crosswalk( - output_filename: str, + output_filename: Path, ) -> None: - output_dir, _ = os.path.split(output_filename) - if output_dir: - os.makedirs(output_dir, exist_ok=True) + os.makedirs(output_filename.parent, exist_ok=True) res = [] for iucn_code in IUCN_HABITAT_CODES: @@ -48,11 +48,14 @@ def generate_crosswalk( df = pd.DataFrame(res, columns=["code", "value"]) df.to_csv(output_filename, index=False) +@snakemake_compatible(mapping={ + "output_filename": "output.crosswalk", +}) def main() -> None: parser = argparse.ArgumentParser(description="Generate a Jung crosswalk table as CSV.") parser.add_argument( '--output', - type=str, + type=Path, help='Path where final crosswalk should be stored', required=True, dest='output_filename', diff --git a/prepare_layers/make_arable_map.py b/prepare_layers/make_arable_map.py index ab7ca93..20aa6da 100644 --- a/prepare_layers/make_arable_map.py +++ b/prepare_layers/make_arable_map.py @@ -1,46 +1,44 @@ import argparse +import os +import shutil from pathlib import Path from typing import Optional -import yirgacheffe.operators as yo +import yirgacheffe as yg from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer - -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) JUNG_ARABLE_CODE = 1401 JUNG_URBAN_CODE = 1405 def make_arable_map( - current_path: Path, + current_dir_path: Path, output_path: Path, concurrency: Optional[int], show_progress: bool, ) -> None: - with RasterLayer.layer_from_file(current_path) as current: + os.makedirs(output_path, exist_ok=True) - arable_map = yo.where(current != JUNG_URBAN_CODE, JUNG_ARABLE_CODE, JUNG_URBAN_CODE) + # In this scenario all land that isn't urban is covered to arable + urban_filename = current_dir_path / f"lcc_{JUNG_URBAN_CODE}.tif" + new_arable_filename = output_path / f"lcc_{JUNG_ARABLE_CODE}.tif" - with RasterLayer.empty_raster_layer_like( - arable_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - arable_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - arable_map.parallel_save(result, parallelism=concurrency) + shutil.copy(urban_filename, output_path) + with yg.read_raster(urban_filename) as urban: + new_arable = 1.0 - urban + if show_progress: + with alive_bar(manual=True) as bar: + new_arable.to_geotiff(new_arable_filename, callback=bar, parallelism=concurrency) + else: + new_arable.to_geotiff(new_arable_filename, parallelism=concurrency) def main() -> None: parser = argparse.ArgumentParser(description="Generate the arable scenario map.") parser.add_argument( '--current', type=Path, - help='Path of current map', + help='Path of fractional current maps', required=True, - dest='current_path', + dest='current_dir_path', ) parser.add_argument( '--output', @@ -68,7 +66,7 @@ def main() -> None: args = parser.parse_args() make_arable_map( - args.current_path, + args.current_dir_path, args.results_path, args.concurrency, args.show_progress, diff --git a/prepare_layers/make_area_map.py b/prepare_layers/make_area_map.py deleted file mode 100644 index 3a2ca29..0000000 --- a/prepare_layers/make_area_map.py +++ /dev/null @@ -1,91 +0,0 @@ -import argparse -import math -from pathlib import Path - -import numpy as np -from yirgacheffe.layers import RasterLayer # type: ignore -from yirgacheffe.operators import DataType # type: ignore -from yirgacheffe.window import Area, PixelScale # type: ignore - -# Taken from https://gis.stackexchange.com/questions/127165/more-accurate-way-to-calculate-area-of-rasters -def area_of_pixel(pixel_size: float, center_lat: float) -> float: - """Calculate m^2 area of a wgs84 square pixel. - - Adapted from: https://gis.stackexchange.com/a/127327/2397 - - Parameters: - pixel_size (float): length of side of pixel in degrees. - center_lat (float): latitude of the center of the pixel. Note this - value +/- half the `pixel-size` must not exceed 90/-90 degrees - latitude or an invalid area will be calculated. - - Returns: - Area of square pixel of side length `pixel_size` centered at - `center_lat` in m^2. - - """ - a = 6378137 # meters - b = 6356752.3142 # meters - e = math.sqrt(1 - (b/a)**2) - area_list = [] - for f in [center_lat+pixel_size/2, center_lat-pixel_size/2]: - zm = 1 - e*math.sin(math.radians(f)) - zp = 1 + e*math.sin(math.radians(f)) - area_list.append( - math.pi * b**2 * ( - math.log(zp/zm) / (2*e) + - math.sin(math.radians(f)) / (zp*zm))) - return pixel_size / 360. * (area_list[0] - area_list[1]) - -def make_area_map( - pixel_scale: float, - output_path: Path, -) -> None: - pixels = [0.0,] * math.floor(90.0 / pixel_scale) - for i in range(len(pixels)): # pylint: disable=C0200 - y = (i + 0.5) * pixel_scale - pixels[i] = area_of_pixel(pixel_scale, y) - - allpixels = np.rot90(np.array([list(reversed(pixels)) + pixels])) - - area = Area( - left=0, - right=pixel_scale, - top=(math.floor(90 / pixel_scale) * pixel_scale), - bottom=(math.floor(90 / pixel_scale) * pixel_scale * -1.0) - ) - with RasterLayer.empty_raster_layer( - area, - PixelScale(pixel_scale, pixel_scale * -1.0), - DataType.Float32, - filename=output_path - ) as res: - assert res.window.xsize == 1 - res._dataset.WriteArray(allpixels, 0, 0) # pylint: disable=W0212 - - -def main() -> None: - parser = argparse.ArgumentParser(description="Downsample habitat map to raster per terrain type.") - parser.add_argument( - "--scale", - type=float, - required=True, - dest="pixel_scale", - help="Output pixel scale value." - ) - parser.add_argument( - "--output", - type=str, - required=True, - dest="output_path", - help="Destination file for area raster." - ) - args = parser.parse_args() - - make_area_map( - args.pixel_scale, - args.output_path - ) - -if __name__ == "__main__": - main() diff --git a/prepare_layers/make_current_map.py b/prepare_layers/make_current_map.py index 519e0b4..a650f61 100644 --- a/prepare_layers/make_current_map.py +++ b/prepare_layers/make_current_map.py @@ -1,13 +1,16 @@ import argparse import itertools +import math +import os +from contextlib import nullcontext from pathlib import Path from multiprocessing import set_start_method from typing import Dict, List, Optional import pandas as pd -import yirgacheffe.operators as yo # type: ignore +import yirgacheffe as yg from alive_progress import alive_bar # type: ignore -from yirgacheffe.layers import RasterLayer # type: ignore +from snakemake_argparse_bridge import snakemake_compatible # type: ignore from osgeo import gdal # type: ignore gdal.SetCacheMax(1 * 1024 * 1024 * 1024) @@ -29,49 +32,73 @@ def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: result[row.code] = [int(row.value)] return result - -def make_current_map( +def make_current_maps( jung_path: Path, update_masks_path: Optional[Path], crosswalk_path: Path, - output_path: Path, - concurrency: Optional[int], + output_dir_path: Path, + parallelism: Optional[int], show_progress: bool, + sentinel_path: Path | None, ) -> None: + os.makedirs(output_dir_path, exist_ok=True) + print(f"Using {parallelism} workers") if update_masks_path is not None: update_masks = [ - RasterLayer.layer_from_file(x) for x in sorted(list(update_masks_path.glob("*.tif"))) + yg.read_raster(x) for x in sorted(list(update_masks_path.glob("*.tif"))) ] else: update_masks = [] - with RasterLayer.layer_from_file(jung_path) as jung: + with yg.read_raster(jung_path) as jung: crosswalk = load_crosswalk_table(crosswalk_path) map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL])) updated_jung = jung for update in update_masks: - updated_jung = yo.where(update != 0, update, updated_jung) + updated_jung = yg.where(update != 0, update, updated_jung) - current_map = yo.where( + current_map = yg.where( updated_jung.isin(map_preserve_code), updated_jung, - (yo.floor(updated_jung / 100) * 100).astype(yo.DataType.UInt16), + (yg.floor(updated_jung / 100) * 100), ) - with RasterLayer.empty_raster_layer_like( - jung, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - current_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - current_map.parallel_save(result, parallelism=concurrency) - + print("Calculating unique land cover types...") + vals = current_map.unique() + + for lcc in vals: + # Seems there are some NaN values in Jung + if math.isnan(lcc): + continue + print(f"Processing {lcc}...") + per_class = current_map == lcc + cast_per_class = per_class.astype(yg.DataType.Float32) + ctx = alive_bar(manual=True, title=str(lcc)) if show_progress else nullcontext() + with ctx as bar: + cast_per_class.to_geotiff( + output_dir_path / f"lcc_{int(lcc)}.tif", + callback=bar, + parallelism=parallelism, + ) + + # This script generates a bunch of rasters, but snakemake needs one + # output to say when this is done, so if we're in snakemake mode we touch a sentinel file to + # let it know we've done. One day this should be another decorator. + if sentinel_path is not None: + os.makedirs(sentinel_path.parent, exist_ok=True) + sentinel_path.touch() + +@snakemake_compatible(mapping={ + "jung_path": "input.habitat", + "update_masks_path": "params.updates_dir", + "crosswalk_path": "input.crosswalk", + "concurrency": "threads", + "output_dir_path": "params.output_dir", + "sentinel_path": "output.sentinel", +}) def main() -> None: set_start_method("spawn") @@ -102,7 +129,7 @@ def main() -> None: type=Path, help='Path where final map should be stored', required=True, - dest='results_path', + dest='output_dir_path', ) parser.add_argument( '-j', @@ -110,7 +137,7 @@ def main() -> None: help='Number of concurrent threads to use for calculation.', required=False, default=None, - dest='concurrency', + dest='parallelism', ) parser.add_argument( '-p', @@ -120,15 +147,24 @@ def main() -> None: action='store_true', dest='show_progress', ) + parser.add_argument( + '--sentinel', + type=Path, + help='Generate a sentinel file on completion for snakemake to track', + required=False, + default=None, + dest='sentinel_path', + ) args = parser.parse_args() - make_current_map( + make_current_maps( args.jung_path, args.update_masks_path, args.crosswalk_path, - args.results_path, - args.concurrency, + args.output_dir_path, + args.parallelism, args.show_progress, + args.sentinel_path, ) if __name__ == "__main__": diff --git a/prepare_layers/make_diff_map.py b/prepare_layers/make_diff_map.py index 6624880..e9fe7c6 100644 --- a/prepare_layers/make_diff_map.py +++ b/prepare_layers/make_diff_map.py @@ -1,131 +1,72 @@ import argparse -import shutil -import tempfile +from contextlib import nullcontext from pathlib import Path -from typing import Optional -from alive_progress import alive_bar -from osgeo import gdal -from yirgacheffe.layers import RasterLayer, UniformAreaLayer -from yirgacheffe.operators import DataType +import yirgacheffe as yg +from alive_progress import alive_bar # type: ignore +from snakemake_argparse_bridge import snakemake_compatible # type: ignore -gdal.SetCacheMax(512 * 1024 * 1024) +POSSIBLE_HABITAT_CLASSES = [100, 200, 300, 400, 500, 600, 700, 800, 900, + 1000, 1100, 1200, 1300, 1400, 1401, 1402, 1403, 1404, + 1405, 1406, 1500, 1600, 1800] def make_diff_map( current_path: Path, scenario_path: Path, - area_path: Path, - pixel_scale: float, - target_projection: Optional[str], output_path: Path, - concurrency: Optional[int], + parallelism: None | int, show_progress: bool, ) -> None: - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = Path(tmpdir) - raw_map_filename = tmpdir_path / "raw.tif" - print("comparing:") - with RasterLayer.layer_from_file(current_path) as current: - with RasterLayer.layer_from_file(scenario_path) as scenario: + layers = [] + for habitat in POSSIBLE_HABITAT_CLASSES: + current_habitat_filename = current_path / f"lcc_{habitat}.tif" + scenario_habitat_filename = scenario_path / f"lcc_{habitat}.tif" - diff_map = current != scenario + if not current_habitat_filename.exists() and not scenario_habitat_filename.exists(): + continue + current_layer = yg.read_raster(current_habitat_filename) if current_habitat_filename.exists() \ + else yg.constant(0.0) + scenario_layer = yg.read_raster(scenario_habitat_filename) if scenario_habitat_filename.exists() \ + else yg.constant(0.0) + habitat_diff = current_layer != scenario_layer + layers.append(habitat_diff) - gdal.SetCacheMax(512 * 1024 * 1024) - with RasterLayer.empty_raster_layer_like( - diff_map, - filename=raw_map_filename, - datatype=DataType.Float32, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - diff_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - diff_map.parallel_save(result, parallelism=concurrency) - - gdal.SetCacheMax(256 * 1024 * 1024 * 1024) - rescaled_map_filename = tmpdir_path / "rescaled.tif" - print("reprojecting:") - with alive_bar(manual=True) as bar: - gdal.Warp(rescaled_map_filename, raw_map_filename, options=gdal.WarpOptions( - creationOptions=['COMPRESS=LZW', 'NUM_THREADS=16'], - multithread=True, - dstSRS=target_projection, - outputType=gdal.GDT_Float32, - xRes=pixel_scale, - yRes=0.0 - pixel_scale, - resampleAlg="average", - workingType=gdal.GDT_Float32, - callback=lambda a, _b, _c: bar(a), # pylint: disable=E1102 - )) - - print("scaling result:") - with UniformAreaLayer.layer_from_file(area_path) as area_map: - with RasterLayer.layer_from_file(rescaled_map_filename) as diff_map: - - area_adjusted_map_filename = tmpdir_path / "final.tif" - final = area_map * diff_map - gdal.SetCacheMax(512 * 1024 * 1024) - - with RasterLayer.empty_raster_layer_like( - final, - filename=area_adjusted_map_filename, - datatype=gdal.GDT_Float32, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - final.parallel_save(result, callback=bar, parallelism=concurrency) - else: - final.parallel_save(result, parallelism=concurrency) - - shutil.move(area_adjusted_map_filename, output_path) + diff = yg.any(layers) + area = yg.area_raster(diff.map_projection) + scaled_diff = diff * area + ctx = alive_bar(manual=True) if show_progress else nullcontext() + with ctx as bar: + scaled_diff.to_geotiff(output_path, callback=bar, parallelism=parallelism) +@snakemake_compatible(mapping={ + "current_path": "input.current", + "scenario_path": "params.scenario", + "parallelism": "threads", + "output_path": "output[0]", +}) def main() -> None: parser = argparse.ArgumentParser(description="Generate an area difference map.") parser.add_argument( '--current', type=Path, - help='Path of current map', + help='Path of current fractional maps', required=True, dest='current_path', ) parser.add_argument( '--scenario', type=Path, - help='Path of the scenario map', + help='Path of the scenario fractional maps', required=True, dest='scenario_path', ) - parser.add_argument( - '--area', - type=Path, - help='Path of the area per pixel map', - required=True, - dest='area_path', - ) - parser.add_argument( - "--scale", - type=float, - required=True, - dest="pixel_scale", - help="Output pixel scale value." - ) - parser.add_argument( - '--projection', - type=str, - help="Target projection", - required=False, - dest="target_projection", - default=None - ) parser.add_argument( '--output', type=Path, help='Path where final map should be stored', required=True, - dest='results_path', + dest='output_path', ) parser.add_argument( '-j', @@ -133,7 +74,7 @@ def main() -> None: help='Number of concurrent threads to use for calculation.', required=False, default=None, - dest='concurrency', + dest='parallelism', ) parser.add_argument( '-p', @@ -148,11 +89,8 @@ def main() -> None: make_diff_map( args.current_path, args.scenario_path, - args.area_path, - args.pixel_scale, - args.target_projection, - args.results_path, - args.concurrency, + args.output_path, + args.parallelism, args.show_progress, ) diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 0b7e876..8036c06 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -1,18 +1,18 @@ import argparse -import math +import multiprocessing import os import resource import sys import time from pathlib import Path -from multiprocessing import Manager, Process, cpu_count +from multiprocessing import Process, cpu_count from queue import Queue from typing import NamedTuple from osgeo import gdal import numpy as np import yirgacheffe as yg -from yirgacheffe.layers import RasterLayer +from snakemake_argparse_bridge import snakemake_compatible # type: ignore gdal.SetCacheMax(4 * 1024 * 1024 * 1024) @@ -22,91 +22,218 @@ # Codes not to touch. We're currently working at Level 1 except for artificial which is level 2 PRESERVE_CODES = [600, 700, 900, 1000, 1100, 1200, 1300, 1405] +# PNV codes +# array([ 100, 200, 300, 400, 500, 600, 800, 900, 1100, 1200], dtype=uint16) + class TileInfo(NamedTuple): """Info about a tile to process""" x_position : int y_position : int width : int height : int - crop_diff : float - pasture_diff : float + crop_target : float + pasture_target : float + +def balance_crop_and_pasture_differences( + crop_diff: float, + pasture_diff: float, + lcc_data_map: dict[int,np.ndarray], +) -> tuple[float,float]: + """ + If we remove one time of agricultural land but expand another, keep them in the same area where possible. + + One thing we know is that reductions are always achievable, as the difference is generated as + (GAEZ and HYDE - Jung), so if we have a negative value the initial state is enough to cover that. + """ + # Either both are a reduction or both an increase, or at least one is null, so no + # balancing required. + if crop_diff * pasture_diff >= 0: + return crop_diff, pasture_diff + + # If balanced they will cancel each other out, otherwise + # we will move the smaller difference from one to the other. + transfer_amount = min(abs(crop_diff), abs(pasture_diff)) + + if crop_diff > 0: + # Crop increasing, pasture decreasing + src_lcc, dst_lcc = PASTURE_CODE, CROP_CODE + else: + # Pasture increasing, crop decreasing + src_lcc, dst_lcc = CROP_CODE, PASTURE_CODE + + src_raster = lcc_data_map[src_lcc] + dst_raster = lcc_data_map[dst_lcc] + + total_cells = src_raster.size + + transfer_mask = src_raster > 0 + src_cells = src_raster.sum() + if src_cells == 0: + if crop_diff > 0: + raise ValueError(f"not cells in pasture {src_raster.sum()}, but pasture diff is -ve {pasture_diff}") + raise ValueError(f"not cells in crop {src_raster.sum()}, but crop diff is -ve {crop_diff}") + + src_coverage = src_raster.sum() / total_cells + + # Per-cell reduction factor: what fraction of each cell's current value to move + # This is safe because our simplifying assumption guarantees transfer <= source_coverage + per_cell_factor = transfer_amount / src_coverage + + transferred = transfer_mask * per_cell_factor + src_raster -= transferred + dst_raster += transferred + + new_crop_diff = crop_diff + transfer_amount * np.sign(pasture_diff) + new_pasture_diff = pasture_diff + transfer_amount * np.sign(crop_diff) + return new_crop_diff, new_pasture_diff + +def remove_land_cover( + lcc_code: int, + diff: float, + pnv: np.ndarray, + lcc_data_map: dict[int,np.ndarray], +) -> None: + assert diff <= 0 + diff = abs(diff) -def process_tile( - current: yg.layers.RasterLayer, - pnv: yg.layers.RasterLayer, - tile: TileInfo, - random_seed: int, -) -> np.ndarray: + agri_raster = lcc_data_map[lcc_code] + removal_mask = agri_raster > 0 + + current_coverage = agri_raster.sum() / agri_raster.size + if current_coverage == 0: + return + + per_cell_fraction = min(diff / current_coverage, 1.0) + + removed_grid = agri_raster * (removal_mask * per_cell_fraction) + agri_raster -= removed_grid + + # Reallocate to PNV classes - note we assume this does not include the agricultural classes + # so as to not undo what we just did! + for lcc, lcc_data in lcc_data_map.items(): + pnv_match = (pnv == lcc) & removal_mask + lcc_data[pnv_match] += removed_grid[pnv_match] + +def add_land_cover( + lcc_code: int, + diff: float, + lcc_data_map: dict[int,np.ndarray], +) -> None: + assert diff >= 0 + + agri_raster = lcc_data_map[lcc_code] + + # Find areas we can put the new data. This is anywhere we don't already + # have agricultural land, and other places unlikely to be converted (cities, lakes, etc.) + # We know that there should be no partial cells involving crop/pasture at this stage + # because of the balancing we did initially. + excluded_codes = [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES + eligible_mask = np.ones_like(agri_raster, dtype=bool) + for excluded_code in excluded_codes: + if excluded_code in lcc_data_map: + eligible_mask &= (lcc_data_map[excluded_code] == 0) + + eligible_count = eligible_mask.sum() + if eligible_count == 0: + return - rng = np.random.default_rng(random_seed) + # Calculate capacity + total_cells = eligible_mask.size + eligible_fraction = eligible_count / total_cells - data = current.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + if eligible_fraction == 0: + return + per_cell_addition = diff / eligible_fraction + per_cell_addition = min(per_cell_addition, 1.0) + + agri_raster[eligible_mask] += per_cell_addition + for lcc, lcc_data in lcc_data_map.items(): + if lcc == lcc_code: + continue + lcc_data[eligible_mask & (lcc_data > 0)] -= per_cell_addition + +def process_tile( + current_maps: dict[int,yg.YirgacheffeLayer], + pnv: yg.YirgacheffeLayer, + tile: TileInfo, +) -> dict[int,np.ndarray]: + + lcc_data_map = { + lcc: current_map.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + for lcc, current_map in current_maps.items() + } + + if np.isnan(tile.crop_target) and np.isnan(tile.pasture_target): + return lcc_data_map + + for current in current_maps.values(): + assert current.map_projection == pnv.map_projection + assert current.area == pnv.area + + if not np.isnan(tile.crop_target): + crop_data = lcc_data_map[CROP_CODE] + crop_diff = tile.crop_target - (crop_data.sum() / crop_data.size) + else: + crop_diff = 0 + if not np.isnan(tile.pasture_target): + pasture_data = lcc_data_map[PASTURE_CODE] + pasture_diff = tile.pasture_target - (pasture_data.sum() / pasture_data.size) + else: + pasture_diff = 0 + if (crop_diff == 0) and (pasture_diff == 0): + return lcc_data_map + + crop_diff, pasture_diff = balance_crop_and_pasture_differences( + crop_diff, + pasture_diff, + lcc_data_map, + ) + + # Order the changes by removals first then additions. In random sampling this was + # important, but not so with a fractional approach. However, we need some consistent + # ordering so we leave this in for consistency. diffs = [ - (tile.crop_diff, CROP_CODE), - (tile.pasture_diff, PASTURE_CODE), + (crop_diff, CROP_CODE), + (pasture_diff, PASTURE_CODE), ] diffs.sort(key=lambda a: a[0]) - # Ordered so removes will come first + pnv_data = None for diff_value, habitat_code in diffs: - if np.isnan(diff_value): + if diff_value == 0 or np.isnan(diff_value): continue - required_points = math.floor(data.shape[0] * data.shape[1] * math.fabs(diff_value)) - if required_points == 0: - continue - - if diff_value > 0: - valid_mask = ~np.isin(data, [CROP_CODE, PASTURE_CODE] + PRESERVE_CODES) - else: - valid_mask = data == habitat_code - valid_locations = valid_mask.nonzero() - possible_points = len(valid_locations[0]) - if possible_points == 0: - continue - required_points = min(required_points, possible_points) - - selected_locations = rng.choice( - len(valid_locations[0]), - size=required_points, - replace=False - ) - rows = valid_locations[0][selected_locations] - cols = valid_locations[1][selected_locations] - if diff_value > 0: - data[rows, cols] = habitat_code + if diff_value < 0: + if pnv_data is None: + pnv_data = pnv.read_array(tile.x_position, tile.y_position, tile.width, tile.height) + remove_land_cover(habitat_code, diff_value, pnv_data, lcc_data_map) else: - for y, x in zip(rows, cols): - absolute_x = x + tile.x_position - absolute_y = y + tile.y_position - lat, lng = current.latlng_for_pixel(absolute_x, absolute_y) - pnv_x, pnv_y = pnv.pixel_for_latlng(lat, lng) - val = pnv.read_array(pnv_x, pnv_y, 1, 1)[0][0] - data[y][x] = val + add_land_cover(habitat_code, diff_value, lcc_data_map) - return data + return lcc_data_map def process_tile_concurrently( current_lvl1_path: Path, pnv_path: Path, input_queue: Queue, - result_queue: Queue, + result_queues: dict[int,Queue], ) -> None: - with yg.read_raster(current_lvl1_path) as current: - with yg.read_raster(pnv_path) as pnv: - while True: - job : tuple[TileInfo, int] | None = input_queue.get() - if job is None: - break - tile, seed = job - if np.isnan(tile.crop_diff) and np.isnan(tile.pasture_diff): - result_queue.put((tile, None)) - else: - data = process_tile(current, pnv, tile, seed) - result_queue.put((tile, data.tobytes())) - - result_queue.put(None) + current_maps = { + int(filename.stem.split('_')[1]): yg.read_raster(filename) for filename in current_lvl1_path.glob("lcc_*.tif") + } + reference_layer = next(iter(current_maps.values())) + with yg.read_raster(pnv_path) as pnv: + pnv.set_window_for_intersection(reference_layer.area) + while True: + tile : TileInfo | None = input_queue.get() + if tile is None: + break + res = process_tile(current_maps, pnv, tile) + for lcc, data in res.items(): + result_queues[lcc].put((tile, data.tobytes())) + for queue in result_queues.values(): + queue.put(None) def build_tile_list( current_lvl1_path: Path, @@ -114,66 +241,71 @@ def build_tile_list( pasture_adjustment_path: Path, ) -> list[TileInfo]: tiles = [] - with yg.read_raster(current_lvl1_path) as current: - current_dimensions = current.window.xsize, current.window.ysize - with yg.read_raster(crop_adjustment_path) as crop_diff: - with yg.read_raster(pasture_adjustment_path) as pasture_diff: - assert crop_diff.window == pasture_diff.window - diff_dimensions = crop_diff.window.xsize, crop_diff.window.ysize - - x_scale = current_dimensions[0] / diff_dimensions[0] - y_scale = current_dimensions[1] / diff_dimensions[1] - - x_steps = [round(i * x_scale) for i in range(diff_dimensions[0])] - x_steps.append(current_dimensions[0]) - y_steps = [round(i * y_scale) for i in range(diff_dimensions[1])] - y_steps.append(current_dimensions[1]) - - for y in range(crop_diff.window.ysize): - crop_row = crop_diff.read_array(0, y, crop_diff.window.xsize, 1) - pasture_row = pasture_diff.read_array(0, y, pasture_diff.window.xsize, 1) - for x in range(crop_diff.window.xsize): - tiles.append(TileInfo( - x_steps[x], - y_steps[y], - (x_steps[x+1] - x_steps[x]), - (y_steps[y+1] - y_steps[y]), - crop_row[0][x], - pasture_row[0][x], - )) + + with yg.read_raster(next(current_lvl1_path.glob("*.tif"))) as example: + current_dimensions = example.window.xsize, example.window.ysize + with ( + yg.read_raster(crop_adjustment_path) as crop, + yg.read_raster(pasture_adjustment_path) as pasture, + ): + assert crop.window == pasture.window + argi_dimensions = crop.window.xsize, crop.window.ysize + + x_scale = current_dimensions[0] / argi_dimensions[0] + y_scale = current_dimensions[1] / argi_dimensions[1] + + x_steps = [round(i * x_scale) for i in range(argi_dimensions[0])] + x_steps.append(current_dimensions[0]) + y_steps = [round(i * y_scale) for i in range(argi_dimensions[1])] + y_steps.append(current_dimensions[1]) + + for y in range(crop.window.ysize): + crop_row = crop.read_array(0, y, crop.window.xsize, 1) + pasture_row = pasture.read_array(0, y, pasture.window.xsize, 1) + for x in range(crop.window.xsize): + tiles.append(TileInfo( + x_steps[x], + y_steps[y], + (x_steps[x+1] - x_steps[x]), + (y_steps[y+1] - y_steps[y]), + crop_row[0][x], + pasture_row[0][x], + )) return tiles def assemble_map( + lcc: int, current_lvl1_path: Path, output_path: Path, result_queue: Queue, sentinal_count: int, ) -> None: + os.makedirs(output_path, exist_ok=True) + with yg.read_raster(current_lvl1_path / f"lcc_{lcc}.tif") as current_map: + new_map = yg.layers.RasterLayer.empty_raster_layer_like( + current_map, + filename=output_path / f"lcc_{lcc}.tif", + threads=16, + ) + dtype = current_map.read_array(0, 0, 1, 1).dtype + band = new_map._dataset.GetRasterBand(1) # pylint: disable=W0212 + + count = 0 + while True: + result : tuple[TileInfo,bytearray] | None = result_queue.get() + if result is None: + sentinal_count -= 1 + if sentinal_count == 0: + break + continue - with yg.read_raster(current_lvl1_path) as current: - dtype = current.read_array(0, 0, 1, 1).dtype - with RasterLayer.empty_raster_layer_like(current, filename=output_path) as output: - - # A cheat as we don't have a neat API for this on yirgacheffe yet - band = output._dataset.GetRasterBand(1) # pylint: disable=W0212 - - while True: - result : tuple[TileInfo,bytearray | None] | None = result_queue.get() - if result is None: - sentinal_count -= 1 - if sentinal_count == 0: - break - continue - - tile, rawdata = result - if rawdata is None: - data = current.read_array(tile.x_position, tile.y_position, tile.width, tile.height) - else: - n = np.frombuffer(rawdata, dtype=dtype) - data = np.reshape(n, (tile.height, tile.width)) - - band.WriteArray(data, tile.x_position, tile.y_position) - + count += 1 + tile, rawdata = result + n = np.frombuffer(rawdata, dtype=dtype) + data = np.reshape(n, (tile.height, tile.width)) + band.WriteArray(data, tile.x_position, tile.y_position) + if count % 1000 == 0: + print(f"{lcc}: assembled {count} tiles") def pipeline_source( current_lvl1_path: Path, @@ -181,27 +313,28 @@ def pipeline_source( pasture_adjustment_path: Path, source_queue: Queue, sentinal_count: int, - random_seed: int, ) -> None: - rng = np.random.default_rng(random_seed) - tiles = build_tile_list( current_lvl1_path, crop_adjustment_path, pasture_adjustment_path, ) - seeds = rng.integers(2**63, size=len(tiles)) - for tile, seed in zip(tiles, seeds): - source_queue.put((tile, seed)) + print(f"There are {len(tiles)} tiles") + for tile in tiles: + source_queue.put(tile) for _ in range(sentinal_count): source_queue.put(None) + +def get_lcc_list(current_lvl1_path: Path) -> list[int]: + rasters = current_lvl1_path.glob("*.tif") + return [int(x.stem.split('_')[1]) for x in rasters] + def make_food_current_map( current_lvl1_path: Path, pnv_path: Path, crop_adjustment_path: Path, pasture_adjustment_path: Path, - random_seed: int, output_path: Path, processes_count: int, ) -> None: @@ -209,60 +342,74 @@ def make_food_current_map( # we need to adjust the ulimit, which is quite low by default _, max_fd_limit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (max_fd_limit, max_fd_limit)) - print(f"Set fd limit to {max_fd_limit}") os.makedirs(output_path.parent, exist_ok=True) - with Manager() as manager: - source_queue = manager.Queue() - result_queue = manager.Queue() - - workers = [Process(target=process_tile_concurrently, args=( - current_lvl1_path, - pnv_path, - source_queue, - result_queue, - )) for _ in range(processes_count)] - for worker_process in workers: - worker_process.start() - - source_worker = Process(target=pipeline_source, args=( - current_lvl1_path, - crop_adjustment_path, - pasture_adjustment_path, - source_queue, - processes_count, - random_seed, - )) - source_worker.start() + lcc_list = get_lcc_list(current_lvl1_path) + result_queues: dict[int,multiprocessing.queues.Queue] = { + lcc: multiprocessing.Queue(maxsize=10) for lcc in lcc_list + } - assemble_map( + assembly_processes = [ + Process(target=assemble_map, args=( + lcc, current_lvl1_path, output_path, - result_queue, + queue, processes_count, - ) + )) for lcc, queue in result_queues.items() + ] + for assembly_worker in assembly_processes: + assembly_worker.start() - processes = workers - processes.append(source_worker) - while processes: - candidates = [x for x in processes if not x.is_alive()] - for candidate in candidates: - candidate.join() - if candidate.exitcode: - for victim in processes: - victim.kill() - sys.exit(candidate.exitcode) - processes.remove(candidate) - time.sleep(0.1) + source_queue: multiprocessing.queues.Queue = multiprocessing.Queue(maxsize=1000) + workers = [Process(target=process_tile_concurrently, args=( + current_lvl1_path, + pnv_path, + source_queue, + result_queues, + )) for _ in range(processes_count)] + for worker_process in workers: + worker_process.start() + + source_worker = Process(target=pipeline_source, args=( + current_lvl1_path, + crop_adjustment_path, + pasture_adjustment_path, + source_queue, + processes_count, + )) + source_worker.start() + + processes = workers + assembly_processes + processes.append(source_worker) + while processes: + candidates = [x for x in processes if not x.is_alive()] + for candidate in candidates: + candidate.join() + if candidate.exitcode: + for victim in processes: + victim.kill() + sys.exit(candidate.exitcode) + processes.remove(candidate) + time.sleep(0.1) + +@snakemake_compatible(mapping={ + "current_lvl1_path": "input.jung", + "pnv_path": "input.pnv", + "crop_adjustment_path": "input.crop", + "pasture_adjustment_path": "input.pasture", + "processes_count": "threads", + "output_path": "output.rasters", +}) def main() -> None: parser = argparse.ArgumentParser(description="Build the food current map") parser.add_argument( "--current_lvl1", type=Path, required=True, - help="Path of lvl1 current map", + help="Path of lvl1 current maps", dest="current_lvl1_path", ) parser.add_argument( @@ -273,26 +420,19 @@ def main() -> None: dest='pnv_path', ) parser.add_argument( - "--crop_diff", + "--crop", type=Path, required=True, - help="Path of adjustment for crop diff", + help="Path of crop area", dest="crop_adjustment_path", ) parser.add_argument( - "--pasture_diff", + "--pasture", type=Path, required=True, - help="Path of adjustment for pasture diff", + help="Path of pasture area", dest="pasture_adjustment_path", ) - parser.add_argument( - "--seed", - type=int, - required=True, - help="Seed the random number generator", - dest="seed", - ) parser.add_argument( '--output', type=Path, @@ -304,7 +444,7 @@ def main() -> None: "-j", type=int, required=False, - default=round(cpu_count() / 1), + default=cpu_count() // 2, dest="processes_count", help="Number of concurrent threads to use." ) @@ -315,7 +455,6 @@ def main() -> None: args.pnv_path, args.crop_adjustment_path, args.pasture_adjustment_path, - args.seed, args.output_path, args.processes_count, ) diff --git a/prepare_layers/make_restore_map.py b/prepare_layers/make_restore_map.py index cb8826e..a273b5f 100644 --- a/prepare_layers/make_restore_map.py +++ b/prepare_layers/make_restore_map.py @@ -1,15 +1,13 @@ import argparse import itertools +import os +from contextlib import ExitStack, nullcontext from pathlib import Path from typing import Dict, List, Optional import pandas as pd -import yirgacheffe.operators as yo +import yirgacheffe as yg from alive_progress import alive_bar -from yirgacheffe.layers import RasterLayer, RescaledRasterLayer - -from osgeo import gdal -gdal.SetCacheMax(1 * 1024 * 1024 * 1024) # From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV IUCN_CODE_REPLACEMENTS = [ @@ -30,33 +28,46 @@ def load_crosswalk_table(table_file_name: Path) -> Dict[str,List[int]]: result[row.code] = [int(row.value)] return result - def make_restore_map( pnv_path: Path, - current_path: Path, + current_dir_path: Path, crosswalk_path: Path, output_path: Path, - concurrency: Optional[int], + parallelism: Optional[int], show_progress: bool, ) -> None: - with RasterLayer.layer_from_file(current_path) as current: - with RescaledRasterLayer.layer_from_file(pnv_path, current.pixel_scale) as pnv: - crosswalk = load_crosswalk_table(crosswalk_path) + os.makedirs(output_path, exist_ok=True) + + crosswalk = load_crosswalk_table(crosswalk_path) + + map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) + ideal_map_replacement_filenames = [current_dir_path / f"lcc_{code}.tif" for code in map_replacement_codes] + map_replacement_filenames = [path for path in ideal_map_replacement_filenames if path.exists()] + + with ExitStack() as stack: + replacement_maps = [stack.enter_context(yg.read_raster(filename)) for filename in map_replacement_filenames] + replacement_total = yg.sum(replacement_maps) - map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) - restore_map = yo.where(current.isin(map_replacement_codes), pnv, current) + # all the ones we expect to be left with, but not the ones we're removing + current_raster_filenames = [ + path for path in current_dir_path.glob("*.tif") if path not in map_replacement_filenames + ] - with RasterLayer.empty_raster_layer_like( - restore_map, - filename=output_path, - threads=16 - ) as result: - if show_progress: - with alive_bar(manual=True) as bar: - restore_map.parallel_save(result, callback=bar, parallelism=concurrency) - else: - restore_map.parallel_save(result, parallelism=concurrency) + # Read the PNV as the same scale as the other maps + with yg.read_raster_like(pnv_path, replacement_maps[0], yg.ResamplingMethod.Nearest) as pnv: + for filename in current_raster_filenames: + lcc_code = int(filename.stem.split('_')[1]) + ctx = alive_bar(manual=True, title=str(lcc_code)) if show_progress else nullcontext() + with ctx as bar: + with yg.read_raster(filename) as layer: + updated_layer = layer + (replacement_total * (pnv == lcc_code).astype(yg.DataType.Float32)) + capped_updated_layer = yg.where(updated_layer > 1, 1.0, updated_layer) + capped_updated_layer.to_geotiff( + output_path / f"lcc_{lcc_code}.tif", + callback=bar, + parallelism=parallelism, + ) def main() -> None: parser = argparse.ArgumentParser(description="Zenodo resource downloader.") @@ -70,9 +81,9 @@ def main() -> None: parser.add_argument( '--current', type=Path, - help='Path of current map', + help='Path of current maps', required=True, - dest='current_path', + dest='current_dir_path', ) parser.add_argument( '--crosswalk', @@ -91,10 +102,10 @@ def main() -> None: parser.add_argument( '-j', type=int, - help='Number of concurrent threads to use for calculation.', + help='Number of parallel threads to use for calculation.', required=False, default=None, - dest='concurrency', + dest='parallelism', ) parser.add_argument( '-p', @@ -108,10 +119,10 @@ def main() -> None: make_restore_map( args.pnv_path, - args.current_path, + args.current_dir_path, args.crosswalk_path, args.results_path, - args.concurrency, + args.parallelism, args.show_progress, ) diff --git a/requirements.txt b/requirements.txt index 0da34ab..eed4e1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,12 +14,18 @@ rasterio requests alive-progress matplotlib -yirgacheffe -aoh +yirgacheffe>=1.12.7 +aoh>=2.0.5 gdal[numpy] git+https://github.com/quantifyearth/iucn_modlib.git git+https://github.com/quantifyearth/seasonality + +# Snakemake workflow management +snakemake>=8.0 +# We require a bug fix on this package that isn't yet upstream +snakemake-argparse-bridge @ git+https://github.com/mdales/snakemake-argparse-bridge@77ac272d0bc22c370aeb9d1670882fd9dac9b098 + # developer requirements pytest pylint @@ -28,3 +34,4 @@ pandas-stubs types-requests yirgacheffe[dev] aoh[dev] +snakefmt diff --git a/scripts/generate_food_map.sh b/scripts/generate_food_map.sh index 9d93a88..ea8cd27 100755 --- a/scripts/generate_food_map.sh +++ b/scripts/generate_food_map.sh @@ -8,6 +8,26 @@ set -e set -x + +# We know we use two Go tools, so add go/bin to our path as in slurm world they're likely +# to be installed locally +export PATH="${PATH}":"${HOME}"/go/bin +if ! hash reclaimer 2>/dev/null; then + echo "Please ensure reclaimer is available" + exit 1 +fi + +# Detect if we're running under SLURM +if [[ -n "${SLURM_JOB_ID}" ]]; then + # Slurm users will probably need to customise this + # shellcheck disable=SC1091 + source "${HOME}"/venvs/life/bin/activate + cd "${HOME}"/dev/life + PROCESS_COUNT="${SLURM_JOB_CPUS_PER_NODE}" +else + PROCESS_COUNT=$(nproc --all) +fi + if [ -z "${DATADIR}" ]; then echo "Please specify $DATADIR" exit 1 @@ -18,79 +38,89 @@ if [ -z "${VIRTUAL_ENV}" ]; then exit 1 fi -if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then - echo "LIFE Level 1 current map required" - exit 1 +if [ ! -f "${DATADIR}"/100m/jung_current/.sentinel ]; then + if [ ! -f "${DATADIR}"/habitat/jung_l2_raw.tif ]; then + reclaimer zenodo --zenodo_id 4058819 \ + --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ + --extract \ + --output "${DATADIR}"/habitat/jung_l2_raw.tif + fi + + if [ ! -d "${DATADIR}"/habitat/lvl2_changemasks_ver004 ]; then + reclaimer zenodo --zenodo_id 4058819 \ + --filename lvl2_changemasks_ver004.zip \ + --extract \ + --output "${DATADIR}"/habitat/ + fi + + if [ ! -f "${DATADIR}"/100m/jung_current/.sentinel ]; then + python3 ./prepare_layers/make_current_map.py --jung_l2 "${DATADIR}"/habitat/jung_l2_raw.tif \ + --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ + --crosswalk "${DATADIR}"/crosswalk.csv \ + --output "${DATADIR}"/100m/jung_current \ + --sentinel "${DATADIR}"/100m/jung_current/.sentinel \ + -j "${PROCESS_COUNT}" + fi fi if [ ! -f "${DATADIR}"/habitat/pnv_raw.tif ]; then - echo "Jung PNV map required" - exit 1 + reclaimer zenodo --zenodo_id 4038749 \ + --filename pnv_lvl1_004.zip \ + --extract \ + --output "${DATADIR}"/habitat/pnv_raw.tif fi -if [ ! -d "${DATADIR}"/food ]; then - mkdir -p "${DATADIR}"/food +if [ ! -f "${DATADIR}"/habitat/pnv_100m.tif ]; then + # In theory we don't need to do this, as Yirgacheffe can rescale dynamically + # but in practice it's faster if we just do this once like this, at the cost + # of some extra storage requirements + gdal -tr 0.000898315284120 -0.000898315284120 \ + -r near \ + -tap \ + -multi -wo NUM_THREADS=ALL_CPUS \ + -co COMPRESS=LZW -co NUM_THREADS=ALL_CPUS \ + "${DATADIR}"/habitat/pnv_raw.tif + "${DATADIR}"/habitat/pnv_100m.tif fi # Get GAEZ data -if [ ! -f "${DATADIR}"/food/GLCSv11_02_5m.tif ]; then - if [ ! -f "${DATADIR}"/food/LR.zip ]; then - curl -o "${DATADIR}"/food/LR.zip https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip +if [ ! -f "${DATADIR}"/habitat/GLCSv11_02_5m.tif ]; then + if [ ! -f "${DATADIR}"/habitat/LR.zip ]; then + curl -o "${DATADIR}"/habitat/LR.zip https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip fi - unzip -j "${DATADIR}"/food/LR.zip LR/lco/GLCSv11_02_5m.tif -d "${DATADIR}"/food/ + unzip -j "${DATADIR}"/habitat/LR.zip LR/lco/GLCSv11_02_5m.tif -d "${DATADIR}"/habitat/ fi # Get HYDE 3.2 data -if [ ! -f "${DATADIR}"/food/modified_grazing2017AD.asc ]; then - if [ ! -f "${DATADIR}"/food/baseline.zip ]; then - curl -o "${DATADIR}"/food/baseline.zip "https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip" +if [ ! -f "${DATADIR}"/habitat/modified_grazing2017AD.asc ]; then + if [ ! -f "${DATADIR}"/habitat/baseline.zip ]; then + curl -o "${DATADIR}"/habitat/baseline.zip "https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip" fi - if [ ! -f "${DATADIR}"/food/2017AD_lu.zip ]; then - unzip -j "${DATADIR}"/food/baseline.zip baseline/zip/2017AD_lu.zip -d "${DATADIR}"/food/ + if [ ! -f "${DATADIR}"/habitat/2017AD_lu.zip ]; then + unzip -j "${DATADIR}"/habitat/baseline.zip baseline/zip/2017AD_lu.zip -d "${DATADIR}"/habitat/ fi - if [ ! -f "${DATADIR}"/food/grazing2017AD.asc ]; then - unzip -j "${DATADIR}"/food/2017AD_lu.zip grazing2017AD.asc -d "${DATADIR}"/food/ + if [ ! -f "${DATADIR}"/habitat/grazing2017AD.asc ]; then + unzip -j "${DATADIR}"/habitat/2017AD_lu.zip grazing2017AD.asc -d "${DATADIR}"/habitat/ fi # The pixel scale in the two files have different rounding despite covering the same area # and so this makes them align. - sed "s/0.0833333/0.08333333333333333/" "${DATADIR}"/food/grazing2017AD.asc > "${DATADIR}"/food/modified_grazing2017AD.asc + sed "s/0.0833333/0.08333333333333333/" "${DATADIR}"/habitat/grazing2017AD.asc > "${DATADIR}"/habitat/modified_grazing2017AD.asc fi -if [ ! -f "${DATADIR}"/food/modified_grazing2017AD.prj ]; then - echo 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' > "${DATADIR}"/food/modified_grazing2017AD.prj +if [ ! -f "${DATADIR}"/habitat/modified_grazing2017AD.prj ]; then + echo 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]' > "${DATADIR}"/habitat/modified_grazing2017AD.prj fi -# We need rescaled versions of the current data -if [ ! -d "${DATADIR}"/food/current_layers ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/current_raw.tif \ - --scale 0.08333333333333333 \ - --output "${DATADIR}"/food/current_layers/ +if [ ! -f "${DATADIR}"/habitat/crop.tif ] || [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then + python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/habitat/GLCSv11_02_5m.tif \ + --hyde "${DATADIR}"/habitat/modified_grazing2017AD.asc \ + --output "${DATADIR}"/habitat/ fi -# Combine GAEZ and HYDE data -if [ ! -f "${DATADIR}"/food/crop.tif ] || [ ! -f "${DATADIR}"/food/pasture.tif ]; then - python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/food/GLCSv11_02_5m.tif \ - --hyde "${DATADIR}"/food/modified_grazing2017AD.asc \ - --output "${DATADIR}"/food/ -fi - -if [ ! -f "${DATADIR}"/food/crop_diff.tif ]; then - python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/crop.tif \ - --raster_b "${DATADIR}"/food/current_layers/lcc_1401.tif \ - --output "${DATADIR}"/food/crop_diff.tif -fi - -if [ ! -f "${DATADIR}"/food/pasture_diff.tif ]; then - python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/pasture.tif \ - --raster_b "${DATADIR}"/food/current_layers/lcc_1402.tif \ - --output "${DATADIR}"/food/pasture_diff.tif -fi - -if [ ! -f "${DATADIR}"/food/current_raw.tif ]; then - python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/habitat/current_raw.tif \ - --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --crop_diff "${DATADIR}"/food/crop_diff.tif \ - --pasture_diff "${DATADIR}"/food/pasture_diff.tif \ - --seed 42 \ - --output "${DATADIR}"/food/current_raw.tif +if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then + python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/100m/jung_current \ + --pnv "${DATADIR}"/habitat/pnv_100m.tif \ + --crop "${DATADIR}"/habitat/crop.tif \ + --pasture "${DATADIR}"/habitat/pasture.tif \ + --output "${DATADIR}"/100m/current fi diff --git a/scripts/run.sh b/scripts/run.sh index 958bd8f..88a9fcb 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -69,7 +69,7 @@ if [ ! -f "${DATADIR}"/crosswalk.csv ]; then fi # Get habitat layer and prepare for use -if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then +if [ ! -f "${DATADIR}"/5_arc_seconds/current/.sentinel ]; then if [ ! -f "${DATADIR}"/habitat/jung_l2_raw.tif ]; then reclaimer zenodo --zenodo_id 4058819 \ --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ @@ -84,17 +84,31 @@ if [ ! -f "${DATADIR}"/habitat/current_raw.tif ]; then --output "${DATADIR}"/habitat/ fi - python3 ./prepare_layers/make_current_map.py --jung "${DATADIR}"/habitat/jung_l2_raw.tif \ - --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/current_raw.tif \ - -j 16 -fi + if [ ! -f "${DATADIR}"/100m/current/.sentinel ]; then + python3 ./prepare_layers/make_current_map.py --jung_l2 "${DATADIR}"/habitat/jung_l2_raw.tif \ + --update_masks "${DATADIR}"/habitat/lvl2_changemasks_ver004 \ + --crosswalk "${DATADIR}"/crosswalk.csv \ + --output "${DATADIR}"/100m/current \ + --sentinel "${DATADIR}"/100m/current/.sentinel \ + -j 16 + fi -if [ ! -d "${DATADIR}"/habitat_maps/current ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/current_raw.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/current/ + if [ ! -f "${DATADIR}"/5_arc_seconds/current/.sentinel ]; then + mkdir -p "${DATADIR}"/5_arc_seconds/current + for d in "${DATADIR}"/100m/current/*.tif; do + basename=$(basename "${d}") + gdalwarp -t_srs EPSG:4326 \ + -tr 0.016666666666667 -0.016666666666667 \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS=16 \ + -wo NUM_THREADS=16 \ + "${d}" \ + "${DATADIR}"/5_arc_seconds/current/"${basename}" + done + touch "${DATADIR}"/5_arc_seconds/current/.sentinel + fi fi # Get PNV layer and prepare for use @@ -105,151 +119,166 @@ if [ ! -f "${DATADIR}"/habitat/pnv_raw.tif ]; then --output "${DATADIR}"/habitat/pnv_raw.tif fi -if [ ! -d "${DATADIR}"/habitat_maps/pnv ]; then +if [ ! -d "${DATADIR}"/5_arc_seconds/pnv ]; then aoh-habitat-process --habitat "${DATADIR}"/habitat/pnv_raw.tif \ --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/pnv/ -fi - -# Generate an area scaling map -if [ ! -f "${DATADIR}"/area-per-pixel.tif ]; then - python3 ./prepare_layers/make_area_map.py --scale "${PIXEL_SCALE}" --output "${DATADIR}"/area-per-pixel.tif + --output "${DATADIR}"/5_arc_seconds/pnv/ fi # Generate the arable scenario map if check_scenario "arable"; then - if [ ! -f "${DATADIR}"/habitat/arable.tif ]; then - python3 ./prepare_layers/make_arable_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --output "${DATADIR}"/habitat/arable.tif + if [ ! -d "${DATADIR}"/habitat/arable ]; then + python3 ./prepare_layers/make_arable_map.py --current "${DATADIR}"/100m/current \ + --output "${DATADIR}"/100m/arable fi - if [ ! -d "${DATADIR}"/habitat_maps/arable ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/arable.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/arable/ + if [ ! -f "${DATADIR}"/5_arc_seconds/arable/.sentinel ]; then + mkdir -p "${DATADIR}"/5_arc_seconds/arable + for d in "${DATADIR}"/100m/arable/*.tif; do + basename=$(basename "${d}") + gdalwarp -t_srs EPSG:4326 \ + -tr 0.016666666666667 -0.016666666666667 \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS=16 \ + -wo NUM_THREADS=16 \ + "${d}" \ + "${DATADIR}"/5_arc_seconds/arable/"${basename}" + done + touch "${DATADIR}"/5_arc_seconds/arable/.sentinel fi if [ ! -f "${DATADIR}"/habitat/arable_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/arable.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ - --scale "${PIXEL_SCALE}" \ + python3 ./prepare_layers/make_diff_map_2.py --current "${DATADIR}"/5_arc_seconds/current \ + --scenario "${DATADIR}"/5_arc_seconds/arable \ --output "${DATADIR}"/habitat/arable_diff_area.tif fi fi # Generate the pasture scenario map -if check_scenario "pasture"; then - if [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then - python3 ./prepare_layers/make_pasture_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --output "${DATADIR}"/habitat/pasture.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/pasture ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/pasture.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/pasture/ - fi - - if [ ! -f "${DATADIR}"/habitat/pasture_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/pasture.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/pasture_diff_area.tif - fi -fi +# if check_scenario "pasture"; then +# if [ ! -f "${DATADIR}"/habitat/pasture.tif ]; then +# python3 ./prepare_layers/make_pasture_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --output "${DATADIR}"/habitat/pasture.tif +# fi +# +# if [ ! -d "${DATADIR}"/habitat_maps/pasture ]; then +# aoh-habitat-process --habitat "${DATADIR}"/habitat/pasture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat_maps/pasture/ +# fi +# +# if [ ! -f "${DATADIR}"/habitat/pasture_diff_area.tif ]; then +# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --scenario "${DATADIR}"/habitat/pasture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/pasture_diff_area.tif +# fi +# fi # Generate the restore map if check_scenario "restore"; then - if [ ! -f "${DATADIR}"/habitat/restore.tif ]; then + if [ ! -d "${DATADIR}"/habitat/restore ]; then python3 ./prepare_layers/make_restore_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/habitat/current_raw.tif \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/restore.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/restore ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/restore.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/restore/ - fi - - if [ ! -f "${DATADIR}"/habitat/restore_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/restore.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/restore_diff_area.tif - fi -fi - -# Generate the restore_agriculture map -if check_scenario "restore_agriculture"; then - if [ ! -f "${DATADIR}"/habitat/restore_agriculture.tif ]; then - python3 ./prepare_layers/make_restore_agriculture_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/habitat/current_raw.tif \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/restore_agriculture.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/restore_agriculture ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_agriculture.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/restore_agriculture/ - fi - - if [ ! -f "${DATADIR}"/habitat/restore_agriculture_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/restore_agriculture.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/restore_agriculture_diff_area.tif - fi -fi - -# Generate the restore all map -if check_scenario "restore_all"; then - if [ ! -f "${DATADIR}"/habitat/restore_all.tif ]; then - python3 ./prepare_layers/make_restore_all_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --current "${DATADIR}"/habitat/current_raw.tif \ + --current "${DATADIR}"/100m/current \ --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat/restore_all.tif - fi - - if [ ! -d "${DATADIR}"/habitat_maps/restore_all ]; then - aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_all.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat_maps/restore_all/ + --output "${DATADIR}"/100m/restore fi - if [ ! -f "${DATADIR}"/habitat/restore_all_diff_area.tif ]; then - python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --scenario "${DATADIR}"/habitat/restore_all.tif \ - --area "${DATADIR}"/area-per-pixel.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/restore_all_diff_area.tif + if [ ! -f "${DATADIR}"/5_arc_seconds/restore/.sentinel ]; then + mkdir -p "${DATADIR}"/5_arc_seconds/restore + for d in "${DATADIR}"/100m/restore/*.tif; do + basename=$(basename "${d}") + gdalwarp -t_srs EPSG:4326 \ + -tr 0.016666666666667 -0.016666666666667 \ + -r average \ + -multi \ + -co COMPRESS=LZW \ + -co NUM_THREADS=16 \ + -wo NUM_THREADS=16 \ + "${d}" \ + "${DATADIR}"/5_arc_seconds/restore/"${basename}" + done + touch "${DATADIR}"/5_arc_seconds/restore/.sentinel fi -fi -# Generate urban all map -if check_scenario "urban"; then - if [ ! -d "${DATADIR}"/habitat_maps/urban ]; then - python3 ./prepare_layers/make_constant_habitat.py --examplar "${DATADIR}"/habitat_maps/arable/lcc_1401.tif \ - --habitat_code 14.5 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --output "${DATADIR}"/habitat_maps/urban + if [ ! -f "${DATADIR}"/100m/restore_diff_area.tif ]; then + python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/100m/current \ + --scenario "${DATADIR}"/100m/restore \ + --output "${DATADIR}"/100m/restore_diff_area.tif fi - if [ ! -f "${DATADIR}"/habitat/urban_diff_area.tif ]; then - python3 ./prepare_layers/make_constant_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ - --habitat_code 14.5 \ - --crosswalk "${DATADIR}"/crosswalk.csv \ - --area "${DATADIR}"/area-per-pixel.tif \ - --scale "${PIXEL_SCALE}" \ - --output "${DATADIR}"/habitat/urban_diff_area.tif + if [ ! -f "${DATADIR}"/5_arc_seconds/restore_diff_area.tif ]; then + # In theory we should just use gdalwarp here, but it kept running out of + # memory, so we use our own Yirgacheffe version that just does the same thing in small + # chunks. fi fi +# +# # Generate the restore_agriculture map +# if check_scenario "restore_agriculture"; then +# if [ ! -f "${DATADIR}"/habitat/restore_agriculture.tif ]; then +# python3 ./prepare_layers/make_restore_agriculture_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ +# --current "${DATADIR}"/habitat/current_raw.tif \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --output "${DATADIR}"/habitat/restore_agriculture.tif +# fi +# +# if [ ! -d "${DATADIR}"/habitat_maps/restore_agriculture ]; then +# aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_agriculture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat_maps/restore_agriculture/ +# fi +# +# if [ ! -f "${DATADIR}"/habitat/restore_agriculture_diff_area.tif ]; then +# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --scenario "${DATADIR}"/habitat/restore_agriculture.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/restore_agriculture_diff_area.tif +# fi +# fi +# +# # Generate the restore all map +# if check_scenario "restore_all"; then +# if [ ! -f "${DATADIR}"/habitat/restore_all.tif ]; then +# python3 ./prepare_layers/make_restore_all_map.py --pnv "${DATADIR}"/habitat/pnv_raw.tif \ +# --current "${DATADIR}"/habitat/current_raw.tif \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --output "${DATADIR}"/habitat/restore_all.tif +# fi +# +# if [ ! -d "${DATADIR}"/habitat_maps/restore_all ]; then +# aoh-habitat-process --habitat "${DATADIR}"/habitat/restore_all.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat_maps/restore_all/ +# fi +# +# if [ ! -f "${DATADIR}"/habitat/restore_all_diff_area.tif ]; then +# python3 ./prepare_layers/make_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --scenario "${DATADIR}"/habitat/restore_all.tif \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/restore_all_diff_area.tif +# fi +# fi +# +# # Generate urban all map +# if check_scenario "urban"; then +# if [ ! -d "${DATADIR}"/habitat_maps/urban ]; then +# python3 ./prepare_layers/make_constant_habitat.py --examplar "${DATADIR}"/habitat_maps/arable/lcc_1401.tif \ +# --habitat_code 14.5 \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --output "${DATADIR}"/habitat_maps/urban +# fi +# +# if [ ! -f "${DATADIR}"/habitat/urban_diff_area.tif ]; then +# python3 ./prepare_layers/make_constant_diff_map.py --current "${DATADIR}"/habitat/current_raw.tif \ +# --habitat_code 14.5 \ +# --crosswalk "${DATADIR}"/crosswalk.csv \ +# --scale "${PIXEL_SCALE}" \ +# --output "${DATADIR}"/habitat/urban_diff_area.tif +# fi +# fi # Fetch and prepare the elevation layers if [ ! -f "${DATADIR}"/elevation.tif ]; then @@ -289,7 +318,7 @@ python3 ./utils/persistencegenerator.py --datadir "${DATADIR}" \ --scenarios "${SCENARIOS[@]}" # Calculate all the AoHs -littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/aoh-calc -- --force-habitat +littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/aoh-calc -- --force-habitat --pixel-area # Generate validation summaries aoh-collate-data --aoh_results "${DATADIR}"/aohs/current/ --output "${DATADIR}"/aohs/current.csv diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index 43edba9..a44481e 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -1,196 +1,266 @@ import numpy as np import pytest -import yirgacheffe as yg -from yirgacheffe.layers import RasterLayer -from yirgacheffe.operators import DataType -from yirgacheffe.window import Area, PixelScale - -from prepare_layers.make_food_current_map import TileInfo, process_tile - -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected", [ - (42, float("nan"), float("nan"), 42), - - # Other habitat replacement - (42, 1.0, float("nan"), 1401), - (42, float("nan"), 1.0, 1402), - (42, 0.0, float("nan"), 42), - (42, float("nan"), 0.0, 42), - (42, -1.0, float("nan"), 42), - (42, float("nan"), -1.0, 42), - - # Crop replacement - (1401, 1.0, float("nan"), 1401), - (1401, float("nan"), 1.0, 1401), - (1401, 0.0, float("nan"), 1401), - (1401, float("nan"), 0.0, 1401), - (1401, -1.0, float("nan"), 31), - (1401, -1.0, 1.0, 1402), - (1401, float("nan"), -1.0, 1401), - - # Pasture replacement - (1402, 1.0, float("nan"), 1402), - (1402, 1.0, float("nan"), 1402), - (1402, float("nan"), 0.0, 1402), - (1402, 0.0, float("nan"), 1402), - (1402, float("nan"), -1.0, 31), - (1402, 1.0, -1.0, 1401), - (1402, -1.0, float("nan"), 1402), - - # Don't touch urban - (1405, 1.0, float("nan"), 1405), - (1405, float("nan"), 1.0, 1405), - (1405, 0.0, float("nan"), 1405), - (1405, float("nan"), 0.0, 1405), - (1405, -1.0, float("nan"), 1405), - (1405, -1.0, 1.0, 1405), - (1405, float("nan"), -1.0, 1405), +from prepare_layers.make_food_current_map import balance_crop_and_pasture_differences, \ + CROP_CODE, PASTURE_CODE, remove_land_cover, add_land_cover + +@pytest.mark.parametrize( + [ + "initial_crop_diff", + "initial_pasture_diff", + "expected_crop_diff", + "expected_pasture_diff" + ], + [ + (0.0, 0.0, 0.0, 0.0), + (0.8, 0.0, 0.8, 0.0), + (-0.8, 0.0, -0.8, 0.0), + (0.0, 0.8, 0.0, 0.8), + (0.0, -0.8, 0.0, -0.8), + (0.4, 0.2, 0.4, 0.2), + (-0.4, -0.2, -0.4, -0.2), + ] +) +def test_balance_not_needed( + initial_crop_diff: float, + initial_pasture_diff: float, + expected_crop_diff: float, + expected_pasture_diff: float, +) -> None: + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + initial_crop_diff, + initial_pasture_diff, + {}, + ) + assert expected_crop_diff == result_crop_diff + assert expected_pasture_diff == result_pasture_diff + + +def test_brokend_balance() -> None: + # Testing that we spot if we've said to remove land where there isn't any + lcc_data_map = { + CROP_CODE: np.zeros((10, 10)), + PASTURE_CODE: np.zeros((10, 10)), + } + with pytest.raises(ValueError): + _ = balance_crop_and_pasture_differences( + 0.5, + -0.1, + lcc_data_map, + ) + + +@pytest.mark.parametrize( + [ + "initial_crop_diff", + "initial_pasture_diff", + "expected_crop_diff", + "expected_pasture_diff", + "crop_cell_value", + "pasture_cell_value" + ], + [ + (0.25, -0.5, 0.0, -0.25, 0.25, 0.75), + ] +) +def test_simple_balance_transfer( + initial_crop_diff: float, + initial_pasture_diff: float, + expected_crop_diff: float, + expected_pasture_diff: float, + crop_cell_value: float, + pasture_cell_value: float, +) -> None: + # 0% crop, 100% pasture + lcc_data_map = { + CROP_CODE: np.zeros((10, 10)), + PASTURE_CODE: np.ones((10, 10)), + } + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + initial_crop_diff, + initial_pasture_diff, + lcc_data_map, + ) + assert expected_crop_diff == result_crop_diff + assert expected_pasture_diff == result_pasture_diff + assert (lcc_data_map[CROP_CODE] == crop_cell_value).all() + assert (lcc_data_map[PASTURE_CODE] == pasture_cell_value).all() + + +@pytest.mark.parametrize( + [ + "initial_crop_diff", + "initial_pasture_diff", + "expected_crop_diff", + "expected_pasture_diff", + "crop_cell_value", + "pasture_cell_value" + ], + [ + (0.25, -0.5, 0.0, -0.25, 0.5, 0.5), + (0.5, -0.25, 0.25, 0.0, 0.5, 0.5), + ] +) +def test_simple_half_balance_transfer( + initial_crop_diff: float, + initial_pasture_diff: float, + expected_crop_diff: float, + expected_pasture_diff: float, + crop_cell_value: float, + pasture_cell_value: float, +) -> None: + # 0% crop, 50% pasture + lcc_data_map = { + CROP_CODE: np.zeros((10, 10)), + PASTURE_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), + } + result_crop_diff, result_pasture_diff = balance_crop_and_pasture_differences( + initial_crop_diff, + initial_pasture_diff, + lcc_data_map, + ) + assert expected_crop_diff == result_crop_diff + assert expected_pasture_diff == result_pasture_diff + + expected_crop_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * crop_cell_value + expected_pasture_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * pasture_cell_value + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_pasture_map == lcc_data_map[PASTURE_CODE]).all() + + +@pytest.mark.parametrize( + [ + "crop_diff", + "expected_crop_cell", + "expected_other_cell", + ], + [ + (-0.5, 0.5, 0.5), + ] +) +def test_remove_land_simple( + crop_diff: float, + expected_crop_cell: float, + expected_other_cell: float, +) -> None: + # 100% crop, 0% other 1 + lcc_data_map = { + 1: np.zeros((10, 10)), + CROP_CODE: np.ones((10, 10)), + } + pnv_data = np.full((10, 10), 1) + + remove_land_cover( + CROP_CODE, + crop_diff, + pnv_data, + lcc_data_map, + ) + + expected_crop_map = np.full((10, 10), expected_crop_cell) + expected_other_map = np.full((10, 10), expected_other_cell) + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_other_map == lcc_data_map[1]).all() + + +@pytest.mark.parametrize( + [ + "crop_diff", + "pnv_value", + "expected_crop_cell", + "expected_other_1_cell", + "expected_other_2_cell", + ], + [ + (-0.5, 1, 0.0, 1.0, 0.0), + (-0.75, 1, 0.0, 1.0, 0.0), # too much + (-0.25, 1, 0.5, 0.5, 0.0), + (-0.5, 2, 0.0, 0.0, 1.0), + (-0.25, 2, 0.5, 0.0, 0.5), + ] +) +def test_remove_land_less_simple( + crop_diff: float, + pnv_value: int, + expected_crop_cell: float, + expected_other_1_cell: float, + expected_other_2_cell: float, +) -> None: + # 50% crop, 50% other 2, 0% other 1 + lcc_data_map = { + 1: np.zeros((10, 10)), + 2: np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float), + CROP_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), + } + pnv_data = np.full((10, 10), pnv_value) + + remove_land_cover( + CROP_CODE, + crop_diff, + pnv_data, + lcc_data_map, + ) + + expected_crop_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_crop_cell + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + expected_other_1_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_other_1_cell + assert (expected_other_1_map == lcc_data_map[1]).all() + expected_other_2_map = np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float) + \ + (np.array([[i % 2] * 10 for i in range(10)]).astype(float) * expected_other_2_cell) + assert (expected_other_2_map == lcc_data_map[2]).all() + +@pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ + (0.5, 0.5, 0.5), + (1.0, 1.0, 0.0), ]) -def test_process_tile_all(initial, crop_diff, pasture_diff, expected) -> None: - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as current: - yg.constant(initial).save(current) - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as pnv: - yg.constant(31).save(pnv) - - test_tile = TileInfo( - x_position=0, - y_position=0, - width=10, - height=12, - crop_diff=crop_diff, - pasture_diff=pasture_diff, - ) - - result = process_tile(current, pnv, test_tile, 42) - expected = np.full((12, 10), expected, dtype=np.int16) - assert (result == expected).all() - -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ # pylint:disable=C0301 - (42, float("nan"), float("nan"), 0, 0, 0), - (42, 0.5, float("nan"), 50, 0, 0), - (42, float("nan"), 0.5, 0, 50, 0), - (42, -0.5, float("nan"), 0, 0, 0), - (42, float("nan"), -0.5, 0, 0, 0), - - (1401, float("nan"), float("nan"), 100, 0, 0), - (1401, -0.5, float("nan"), 50, 0, 50), - (1401, float("nan"), -0.5, 100, 0, 0), - - (1402, float("nan"), float("nan"), 0, 100, 0), - (1402, float("nan"), -0.5, 0, 50, 50), - (1402, -0.5, float("nan"), 0, 100, 0), -]) -def test_partial_replacement( - initial : int, - crop_diff : float, - pasture_diff : float, - expected_crop_count : int, - expected_pasture_count : int, - expected_pnv_count : int, +def test_add_land_simple( + crop_diff: float, + expected_crop_cell: float, + expected_other_cell: float, ) -> None: - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as current: - yg.constant(initial).save(current) - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as pnv: - yg.constant(31).save(pnv) - - test_tile = TileInfo( - x_position=0, - y_position=0, - width=10, - height=10, - crop_diff=crop_diff, - pasture_diff=pasture_diff, - ) - - result = process_tile(current, pnv, test_tile, 42) - crop_count = (result == 1401).sum() - assert crop_count == expected_crop_count - pasture_count = (result == 1402).sum() - assert pasture_count == expected_pasture_count - pnv_count = (result == 31).sum() - assert pnv_count == expected_pnv_count - -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ # pylint:disable=C0301 - (42, float("nan"), float("nan"), 0, 0, 0), - (42, 0.5, float("nan"), 50, 0, 0), - (42, float("nan"), 0.5, 0, 50, 0), - (42, -0.5, float("nan"), 0, 0, 0), - (42, float("nan"), -0.5, 0, 0, 0), - - (1401, float("nan"), float("nan"), 50, 0, 0), - (1401, 1.0, float("nan"), 100, 0, 0), - (1401, 0.5, float("nan"), 100, 0, 0), - (1401, 0.1, float("nan"), 60, 0, 0), - (1401, -0.1, float("nan"), 40, 0, 10), - (1401, -0.5, float("nan"), 0, 0, 50), - (1401, -1.0, float("nan"), 0, 0, 50), - (1401, float("nan"), 1.0, 50, 50, 0), - - (1405, float("nan"), float("nan"), 0, 0, 0), - (1405, 1.0, float("nan"), 50, 0, 0), - (1405, 0.5, float("nan"), 50, 0, 0), - (1405, 0.1, float("nan"), 10, 0, 0), - (1405, -0.1, float("nan"), 0, 0, 0), - (1405, -0.5, float("nan"), 0, 0, 0), - (1405, -1.0, float("nan"), 0, 0, 0), + # 100% crop, 0% other 1 + lcc_data_map = { + 1: np.ones((10, 10)), + CROP_CODE: np.zeros((10, 10)), + } + + add_land_cover( + CROP_CODE, + crop_diff, + lcc_data_map, + ) + + expected_crop_map = np.full((10, 10), expected_crop_cell) + expected_other_map = np.full((10, 10), expected_other_cell) + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_other_map == lcc_data_map[1]).all() + +@pytest.mark.parametrize("crop_diff,expected_crop_cell,expected_other_cell", [ + (0.25, 0.5, 0.5), + (0.5, 1.0, 0.0), ]) -def test_partial_initial_tile( - initial : int, - crop_diff : float, - pasture_diff : float, - expected_crop_count : int, - expected_pasture_count : int, - expected_pnv_count : int, +def test_add_land_avoid_excluded( + crop_diff: float, + expected_crop_cell: float, + expected_other_cell: float, ) -> None: - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as current: - - # Cheating as Yirgacheffe doesn't have a neat way to provide numpy data straight to a layer - band = current._dataset.GetRasterBand(1) - vals = np.array([[initial, 22], [22, initial]]) - all_vals = np.tile(vals, (90, 180)) - band.WriteArray(all_vals, 0, 0) - - with RasterLayer.empty_raster_layer( - Area(-180, 90, 180, -90), - PixelScale(1.0, -1.0), - datatype=DataType.Int16, - ) as pnv: - yg.constant(31).save(pnv) - - test_tile = TileInfo( - x_position=0, - y_position=0, - width=10, - height=10, - crop_diff=crop_diff, - pasture_diff=pasture_diff, - ) - - result = process_tile(current, pnv, test_tile, 42) - crop_count = (result == 1401).sum() - assert crop_count == expected_crop_count - pasture_count = (result == 1402).sum() - assert pasture_count == expected_pasture_count - pnv_count = (result == 31).sum() - assert pnv_count == expected_pnv_count + # 100% crop, 0% other 1 + lcc_data_map = { + 1: np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float), + PASTURE_CODE: np.array([[i % 2] * 10 for i in range(10)]).astype(float), + CROP_CODE: np.zeros((10, 10)), + } + + add_land_cover( + CROP_CODE, + crop_diff, + lcc_data_map, + ) + + expected_crop_map = np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float) * expected_crop_cell + expected_pasture_map = np.array([[i % 2] * 10 for i in range(10)]).astype(float) # unchanged + expected_other_map = np.array([[(i + 1) % 2] * 10 for i in range(10)]).astype(float) * expected_other_cell + assert (expected_crop_map == lcc_data_map[CROP_CODE]).all() + assert (expected_pasture_map == lcc_data_map[PASTURE_CODE]).all() + assert (expected_other_map == lcc_data_map[1]).all() diff --git a/utils/binary_maps.py b/utils/binary_maps.py index f0857b5..a79ee51 100644 --- a/utils/binary_maps.py +++ b/utils/binary_maps.py @@ -1,45 +1,40 @@ import argparse import os from multiprocessing import cpu_count +from pathlib import Path -import numpy as np -from osgeo import gdal -from yirgacheffe.layers import RasterLayer +import yirgacheffe as yg -layers = ["all", "AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] -# layers = ["AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] +LAYERS = ["all", "AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] +# LAYERS = ["AMPHIBIANS", "AVES", "MAMMALIA", "REPTILES"] def binary_maps( - map_path: str, - output_path: str, + map_path: Path, + output_path: Path, parallelism: int ) -> None: - output_dir, filename = os.path.split(output_path) - base, _ext = os.path.splitext(filename) - os.makedirs(output_dir, exist_ok=True) + os.makedirs(output_path.parent, exist_ok=True) - for index, layername in enumerate(layers): - with RasterLayer.layer_from_file(map_path, band=index+1) as inputmap: - with RasterLayer.empty_raster_layer_like( - inputmap, - filename=os.path.join(output_dir, f"{base}_{layername}_binary.tif"), - datatype=gdal.GDT_Int16, - ) as result: - calc = inputmap.numpy_apply(lambda c: np.where(c != 0, c / np.abs(c), 0)) - calc.parallel_save(result, parallelism=parallelism) + for index, layername in enumerate(LAYERS): + with yg.read_raster(map_path, band=index+1) as inputmap: + binary_map = yg.where(inputmap != 0, inputmap / yg.abs(inputmap), 0) + binary_map.astype(yg.DataType.Int16).to_geotiff( + output_path.parent / f"{output_path.stem}_{layername}_binary.tif", + parallelism=parallelism, + ) def main() -> None: parser = argparse.ArgumentParser(description="Converts the output maps to binary") parser.add_argument( "--input", - type=str, + type=Path, required=True, dest="input_filename", help="multilayer result geotiff" ) parser.add_argument( "--output", - type=str, + type=Path, required=True, dest="output_filename", help="Destination geotiff path." diff --git a/utils/footprint_of_humanity.py b/utils/footprint_of_humanity.py index 7399833..77d60a6 100644 --- a/utils/footprint_of_humanity.py +++ b/utils/footprint_of_humanity.py @@ -24,7 +24,7 @@ def absolute( merge1 = pd.merge(current_cleaned, pnv_cleaned, on=["id_no", "season"], suffixes=["_current", "_pnv"]) merge2 = pd.merge(merge1, scenario, on=["id_no", "season"], how="left", indicator=True) merged = merge2[["id_no", "season", "class_name", "aoh_total_current", "aoh_total_pnv", "aoh_total", "_merge"]].copy() - merged.aoh_total = merged.aoh_total.fillna(0) + merged["aoh_total"] = merged.aoh_total.fillna(0) merged.rename(columns={"aoh_total": "aoh_total_scenario"}, inplace=True) merged["current_persistence"] = (merged.aoh_total_current / merged.aoh_total_pnv) ** 0.25 diff --git a/utils/habitat_stats.py b/utils/habitat_stats.py index 9221cf3..0cdb49e 100644 --- a/utils/habitat_stats.py +++ b/utils/habitat_stats.py @@ -4,36 +4,33 @@ from pathlib import Path import pandas as pd -from yirgacheffe.layers import RasterLayer, UniformAreaLayer - -from osgeo import gdal -gdal.SetCacheMax(1024 * 1024 * 16) +import yirgacheffe as yg def habitat_stats( habitats_dir: Path, - area_map_path: Path, output_dir: Path, process_count: int, ) -> None: - with UniformAreaLayer.layer_from_file(area_map_path) as area_raster: - os.makedirs(output_dir, exist_ok=True) - for scenario in Path(habitats_dir).iterdir(): - res = [] - for habitat in scenario.glob("*.tif"): - # Filenames have the form "lcc_200.tif" - we want the IUCN habitat class 2 or 14.1 etc." - basename, _ = os.path.splitext(habitat.name) - jung_habitat_class = int(basename.split("_")[1]) - if jung_habitat_class == 0: - continue - with RasterLayer.layer_from_file(str(habitat)) as raster: - raw_area = raster.parallel_sum(parallelism=process_count) - scaled_area_calc = raster * area_raster - scaled_area = scaled_area_calc.parallel_sum(parallelism=process_count) + os.makedirs(output_dir, exist_ok=True) + for scenario in habitats_dir.iterdir(): + res = [] + for habitat in scenario.glob("*.tif"): + # Filenames have the form "lcc_200.tif" - we want the IUCN habitat class 2 or 14.1 etc." + jung_habitat_class = int(habitat.stem.split("_")[1]) + if jung_habitat_class == 0: + continue + with ( + yg.read_raster(habitat) as raster, + yg.area_raster(raster.map_projection) as area_raster, + ): + raw_area = raster.parallel_sum(parallelism=process_count) + scaled_area_calc = raster * area_raster + scaled_area = scaled_area_calc.parallel_sum(parallelism=process_count) - res.append([jung_habitat_class, raw_area, scaled_area]) - df = pd.DataFrame(res, columns=["habitat", "pixel area", "geo area"]) - df = df.sort_values("habitat") - df.to_csv(output_dir / f"{scenario.name}.csv", index=False) + res.append([jung_habitat_class, raw_area, scaled_area]) + df = pd.DataFrame(res, columns=["habitat", "pixel area", "geo area"]) + df = df.sort_values("habitat") + df.to_csv(output_dir / f"{scenario.name}.csv", index=False) def main() -> None: parser = argparse.ArgumentParser(description="Generate stats on the habitat makeup of each scenario") @@ -43,12 +40,6 @@ def main() -> None: required=True, dest="habitats_dir", ) - parser.add_argument( - "--area", - type=Path, - required=True, - dest="area_map_path", - ) parser.add_argument( "--output", type=Path, @@ -68,7 +59,6 @@ def main() -> None: habitat_stats( args.habitats_dir, - args.area_map_path, args.output_dir, args.process_count, ) diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py index a709246..b95bf43 100644 --- a/utils/persistencegenerator.py +++ b/utils/persistencegenerator.py @@ -24,11 +24,12 @@ def species_generator( res = [] for taxa in taxas: taxa_path = species_info_dir / taxa / "current" - speciess = list(taxa_path.glob("*.geojson")) + speciess = [x.stem.split('_') for x in taxa_path.glob("*.geojson")] for scenario in scenarios: - for species in speciess: + for taxid, season in speciess: res.append([ - species, + taxid, + season, aohs_path / "current" / taxa, aohs_path / scenario / taxa, aohs_path / "pnv" / taxa, @@ -37,7 +38,8 @@ def species_generator( ]) df = pd.DataFrame(res, columns=[ - '--speciesdata', + '--taxid', + '--season', '--current_path', '--scenario_path', '--historic_path', diff --git a/utils/raster_diff.py b/utils/raster_diff.py index d61055a..6c52f9b 100644 --- a/utils/raster_diff.py +++ b/utils/raster_diff.py @@ -3,6 +3,7 @@ from pathlib import Path import yirgacheffe as yg +from snakemake_argparse_bridge import snakemake_compatible # type: ignore def raster_diff( raster_a_path: Path, @@ -10,12 +11,18 @@ def raster_diff( output_path: Path, ) -> None: os.makedirs(output_path.parent, exist_ok=True) + with ( + yg.read_raster(raster_a_path) as raster_a, + yg.read_raster(raster_b_path) as raster_b + ): + result = raster_a - raster_b + result.to_geotiff(output_path, parallelism=True) - with yg.read_raster(raster_a_path) as raster_a: - with yg.read_raster(raster_b_path) as raster_b: - result = raster_a - raster_b - result.to_geotiff(output_path, parallelism=True) - +@snakemake_compatible(mapping={ + "raster_a_path": "input.raster_a", + "raster_b_path": "input.raster_b", + "output_path": "output.diff", +}) def main() -> None: parser = argparse.ArgumentParser(description="Calculates the difference between two rasters") parser.add_argument( @@ -30,13 +37,13 @@ def main() -> None: type=Path, required=True, dest="raster_b_path", - help="Right hands side of the difference" + help="Right hand side of the difference" ) parser.add_argument( "--output", type=Path, required=True, - dest="output_filename", + dest="output_path", help="Destination geotiff file for results." ) args = parser.parse_args() @@ -44,7 +51,7 @@ def main() -> None: raster_diff( args.raster_a_path, args.raster_b_path, - args.output_filename, + args.output_path, ) if __name__ == "__main__": diff --git a/utils/raster_sum.py b/utils/raster_sum.py index 40b0592..2b57ef6 100644 --- a/utils/raster_sum.py +++ b/utils/raster_sum.py @@ -1,117 +1,29 @@ import argparse -import os -import queue import resource -import sys -import tempfile -import time from pathlib import Path -from multiprocessing import Manager, Process, Queue, cpu_count -from typing import Optional -from yirgacheffe.layers import RasterLayer # type: ignore -from yirgacheffe.operators import DataType - -def worker( - compress: bool, - filename: str, - result_dir: Path, - input_queue: Queue, -) -> None: - output_tif = result_dir / filename - - merged_result: Optional[RasterLayer] = None - - while True: - try: - path: Path = input_queue.get_nowait() - except queue.Empty: - break - - with RasterLayer.layer_from_file(path) as partial_raster: - if merged_result is None: - merged_result = RasterLayer.empty_raster_layer_like(partial_raster, datatype=DataType.Float64) - cleaned_raster = partial_raster.nan_to_num(nan=0.0) - cleaned_raster.save(merged_result) - else: - calc = merged_result + partial_raster.nan_to_num(nan=0.0) - temp = RasterLayer.empty_raster_layer_like(calc, datatype=DataType.Float64) - calc.save(temp) - merged_result = temp - - if merged_result: - with RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif, compress=compress) as final: - merged_result.save(final) - input_queue.put(output_tif) +import yirgacheffe as yg +from alive_progress import alive_bar # type: ignore +from snakemake_argparse_bridge import snakemake_compatible # type: ignore def raster_sum( images_dir: Path, output_filename: Path, - processes_count: int ) -> None: - print(f"process count set to {processes_count}") - + # We'll be opening all the deltap files per taxa in one, so we'll need to raise + # the number of files we can open. _, max_fd_limit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (max_fd_limit, max_fd_limit)) - print(f"Set fd limit to {max_fd_limit}") - - os.makedirs(output_filename.parent, exist_ok=True) - - files = list(images_dir.glob("*.tif")) - if not files: - sys.exit(f"No files in {images_dir}, aborting") - print(f"Found {len(files)} images to process") - - with tempfile.TemporaryDirectory() as tempdir: - with Manager() as manager: - source_queue = manager.Queue() - - for file in files: - source_queue.put(file) - - workers = [Process(target=worker, args=( - False, - f"{index}.tif", - Path(tempdir), - source_queue - )) for index in range(processes_count)] - for worker_process in workers: - worker_process.start() - - processes = workers - while processes: - candidates = [x for x in processes if not x.is_alive()] - for candidate in candidates: - candidate.join() - if candidate.exitcode: - for victim in processes: - victim.kill() - sys.exit(candidate.exitcode) - processes.remove(candidate) - time.sleep(0.1) - - # here we should have now a set of images in tempdir to merge - single_worker = Process(target=worker, args=( - True, - output_filename.name, - output_filename.parent, - source_queue - )) - single_worker.start() - - processes = [single_worker] - while processes: - candidates = [x for x in processes if not x.is_alive()] - for candidate in candidates: - candidate.join() - if candidate.exitcode: - for victim in processes: - victim.kill() - sys.exit(candidate.exitcode) - processes.remove(candidate) - time.sleep(1) + layers = [yg.read_raster(x) for x in images_dir.glob("*.tif")] + total = yg.sum(layers) + with alive_bar(manual=True) as bar: + total.to_geotiff(output_filename, callback=bar, parallelism=True) +@snakemake_compatible(mapping={ + "rasters_directory": "input.rasters", + "output_filename": "output[0]", +}) def main() -> None: parser = argparse.ArgumentParser(description="Sums many rasters into a single raster") parser.add_argument( @@ -128,20 +40,11 @@ def main() -> None: dest="output_filename", help="Destination geotiff file for results." ) - parser.add_argument( - "-j", - type=int, - required=False, - default=round(cpu_count() / 2), - dest="processes_count", - help="Number of concurrent threads to use." - ) args = parser.parse_args() raster_sum( args.rasters_directory, args.output_filename, - args.processes_count ) if __name__ == "__main__": diff --git a/utils/regression_plot.py b/utils/regression_plot.py index 48894f6..373b0d5 100644 --- a/utils/regression_plot.py +++ b/utils/regression_plot.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt import numpy as np -from yirgacheffe.layers import RasterLayer +import yirgacheffe as yg def filter_data(chunks): a_chunk, b_chunk = chunks @@ -30,17 +30,19 @@ def regression_plot( output_dir, _ = os.path.split(output_path) os.makedirs(output_dir, exist_ok=True) - with RasterLayer.layer_from_file(a_path) as a_layer: - with RasterLayer.layer_from_file(b_path) as b_layer: - if a_layer.pixel_scale != b_layer.pixel_scale: - sys.exit("GeoTIFFs have different pixel scale") - if a_layer.area != b_layer.area: - sys.exit("GeoTIFFs have different spatial coordinates") - if a_layer.window != b_layer.window: - sys.exit("GeoTIFFs have different pixel dimensions") + with ( + yg.read_raster(a_path) as a_layer, + yg.read_raster(b_path) as b_layer, + ): + if a_layer.pixel_scale != b_layer.pixel_scale: + sys.exit("GeoTIFFs have different pixel scale") + if a_layer.area != b_layer.area: + sys.exit("GeoTIFFs have different spatial coordinates") + if a_layer.window != b_layer.window: + sys.exit("GeoTIFFs have different pixel dimensions") - a_pixels = a_layer.read_array(0, 0, a_layer.window.xsize, a_layer.window.ysize) - b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize) + a_pixels = a_layer.read_array(0, 0, a_layer.window.xsize, a_layer.window.ysize) + b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize) with Pool(processes=cpu_count() // 2) as pool: filtered_chunk_pairs = pool.map(filter_data, zip(a_pixels, b_pixels)) diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py index 85e2808..ccd538d 100644 --- a/utils/speciesgenerator.py +++ b/utils/speciesgenerator.py @@ -41,17 +41,15 @@ def species_generator( habitat_maps_path, data_dir / "elevation-max.tif", data_dir / "elevation-min.tif", - data_dir / "area-per-pixel.tif", data_dir / "crosswalk.csv", species, aohs_path / scenario / taxa, ]) df = pd.DataFrame(res, columns=[ - '--habitats', + '--fractional_habitats', '--elevation-max', '--elevation-min', - '--area', '--crosswalk', '--speciesdata', '--output' diff --git a/workflow/Snakefile b/workflow/Snakefile new file mode 100644 index 0000000..c7e72ad --- /dev/null +++ b/workflow/Snakefile @@ -0,0 +1,59 @@ +# LIFE Pipeline - Snakemake Workflow +# =================================== +# +# This workflow implements the LIFE biodivesity metric methodology. +# It processes species data, generates Area of Habitat (AoH) rasters, and +# produces the different scenario output rasters +# +# Usage: +# snakemake --cores N all +# snakemake --cores N validation +# snakemake --cores N life +# +# Environment variables required: +# DATADIR - Directory for input/output data +# DB_HOST, DB_NAME, DB_USER, DB_PASSWORD - PostgreSQL credentials +# +# For GBIF validation (optional): +# GBIF_USERNAME, GBIF_EMAIL, GBIF_PASSWORD + +import os +from pathlib import Path + +# Allow GDAL/OGR to handle large GeoJSON objects +os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" + +# Source directory (where this repo lives) +SRCDIR = Path(workflow.basedir).parent + + +# Load configuration +configfile: SRCDIR / "config/config.yaml" + + +# Data directory from environment variable +DATADIR = Path(os.environ.get("DATADIR", "/data")) + +# Taxa list from config +TAXA = config["taxa"] + + +# Include rule modules +include: "rules/prepare.smk" +include: "rules/habitat.smk" +include: "rules/aoh.smk" +include: "rules/scenario.smk" +include: "rules/species.smk" + + + +rule species_data: +""" +Target: extract species data from PostgreSQL. +""" +input: + DATADIR / "species-info" / "AVES" / ".birdlife_applied", + expand( + str(DATADIR / "species-info" / "{taxa}" / SCENARIO / "report.csv"), + taxa=TAXA, + ), diff --git a/workflow/rules/habitat.smk b/workflow/rules/habitat.smk new file mode 100644 index 0000000..35d7f49 --- /dev/null +++ b/workflow/rules/habitat.smk @@ -0,0 +1,179 @@ +# LIFE Pipeline - Updated habitat map rules +# ========================================== +# +# These rules handle combining the Jung habitat map with +# farming data from GAEZ and Hyde to generate a more suitable +# base map. + + +# ============================================================================= +# GAEZ data +# ============================================================================= +rule gaez_download: + """ + Fetch the compressed GAEZ data. + """ + output: + archive=DATADIR / "food" / "LR.zip" + params: + url="https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/LR.zip", + log: + DATADIR / "logs" / "download_gaez.log", + shell: + """ + curl -o {output.archive} \ + {params.url} \ + 2>&1 | tee {log} + """ + + +rule gaez_expand: + """ + Decompress the GAEZ download. + """ + output: + gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif" + params: + filepath="LR/lco/GLCSv11_02_5m.tif" + input: + archive=DATADIR / "food" / "LR.zip" + log: + DATADIR / "logs" / "expand_gaez.log", + shell: + """ + unzip -j {input.archive} \ + {params.filepath} \ + -d $(dirname {output.raster}) \ + 2>&1 | tee {log} + """ + + +# ============================================================================= +# Hyde data +# ============================================================================= +rule hyde_download: + """ + Fetch the compressed Hyde data. + """ + output: + archive=DATADIR / "food" / "baseline.zip" + params: + url="https://geo.public.data.uu.nl/vault-hyde/HYDE%203.2%5B1710494848%5D/original/baseline.zip", + log: + DATADIR / "logs" / "download_hyde.log", + shell: + """ + curl -o {output.archive} \ + {params.url} \ + 2>&1 | tee {log} + """ + + +rule hyde_expand_land_usage_archive: + """ + Extract the inner land usage archive. + """ + output: + inner_hyde=DATADIR / "food" / "2017AD_lu.zip" + params: + filepath="baseline/zip/2017AD_lu.zip" + input: + archive=DATADIR / "food" / "baseline.zip" + log: + DATADIR / "logs" / "expand_hyde_1.log", + shell: + """ + unzip -j {input.archive} \ + {params.filepath} \ + -d $(dirname {output.inner_hyde}) \ + 2>&1 | tee {log} + """ + + +rule hyde_expand_land_usage_raster: + """ + Extract raster from the inner land usage archive. + """ + output: + raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" + params: + filepath="grazing2017AD.asc" + input: + archive=DATADIR / "food" / "2017AD_lu.zip" + log: + DATADIR / "logs" / "expand_hyde_2.log", + shell: + """ + unzip -j {input.archive} \ + {params.filepath} \ + -d $(dirname {output.raw_hyde_raster}) \ + 2>&1 | tee {log} + """ + + +rule modify_hyde_pixel_scale: + """ + Modify the pixel scale value in the Hyde data as it isn't precise enough to match the GAEZ data. + """ + output: + modified_hyde_raster=DATADIR / "food" / "modified_grazing2017AD.asc" + input: + raw_hyde_raster=DATADIR / "food" / "grazing2017AD.asc" + shell: + """ + sed "s/0.0833333/0.08333333333333333/" {input.raw_hyde_raster} > {output.modified_hyde_raster} + """ + + +rule add_hyde_projection_info: + """ + The Hyde data ships without a projection specified, so we need to add one so that + the rest of the workflow doesn't complain when we try to mix it in with other projected + raster data. + """ + output: + hyde_projection_file=DATADIR / "food" / "modified_grazing2017AD.prj" + shell: + """ + echo {config["hyde_projection"]} > {output.hyde_projection_file} + """ + + +# ============================================================================= +# Combine GAEZ/Hyde data +# ============================================================================= +rule combine_gaez_hyde: + """ + Combine the GAEZ and Hyde data, adjusting for overflow in cells. + """ + input: + hyde_projection_file=DATADIR / "food" / "modied_grazing2017AD.prj", + hyde_raster=DATADIR / "food" / "modified_ifgrazing2017AD.asc", + gaez_raster=DATADIR / "food" / "GLCSv11_02_5m.tif", + output: + crop=DATADIR / "food" / "crop.tif", + pasture=DATADIR / "food" / "pasture.tif" + params: + output_dir=DATADIR / "food" + script: + str(SRCDIR / "prepare_layers" / "build_gaez_hyde.py") + + +# ============================================================================= +# Combine Jung, Hyde, and GAEZ +# ============================================================================= + +rule build_food_map: + """ + Pull together all the pieces into a new L1 habitat map at the original Jung scale. + """ + input: + crop=DATADIR / "food" / "crop.tif", + pasture=DATADIR / "food" / "pasture.tif", + pnv=DATADIR / "habitat" / "pnv_raw.tif", + jung=DATADIR / "habitat" / "current_raw.tif", + output: + rasters=DATADIR / "food" / "current_raw", + threads: workflow.cores + script: + str(SRCDIR / "prepare_layers" / "make_food_current_map.py") diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk new file mode 100644 index 0000000..e389d27 --- /dev/null +++ b/workflow/rules/prepare.smk @@ -0,0 +1,106 @@ +# LIFE Pipeline - Prepare base layers +# ===================================== + + +# ============================================================================= +# Crosswalk Table +# ============================================================================= + +rule convert_crosswalk: + """ + Generate a crosswalk between IUCN Habitat classes and Jung raster pixel values. + """ + output: + crosswalk=DATADIR / "crosswalk.csv", + script: + str(SRCDIR / "prepare_layers" / "generate_crosswalk.py") + + +# ============================================================================= +# Jung L2 habitat map +# ============================================================================= + +rule jung_habitat_map: + """ + Fetch the Jung habitat map used as the current map. + """ + output: + habitat=DATADIR / "habitat" / "jung_l2_raw.tif" + params: + zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], + filename=config["zenodo"]["jung_habitat"]["filename"], + log: + DATADIR / "logs" / "download_habitat.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --extract \ + --output {output.habitat} \ + 2>&1 | tee {log} + """ + +rule jung_habitat_updates: + """ + Fetch the Jung habitat map updates. + """ + output: + sentinel=DATADIR / "habitat" / ".downloaded_updates" + params: + habitat_dir=DATADIR / "habitat", + zenodo_id=config["zenodo"]["jung_habitat"]["zenodo_id"], + filename=config["zenodo"]["jung_habitat"]["filename"], + log: + DATADIR / "logs" / "download_habitat_update.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --extract \ + --output {params.habitat_dir} \ + 2>&1 | tee {log} + + touch {output.sentinel} + """ + +rule current_raws: + """ + Build the LIFE current map, which is Jung with updates applied + and restricted to L1 to match the PNV map restrictions. + """ + input: + updates_sentinel=DATADIR / "habitat" / ".downloaded_updates", + habitat=DATADIR / "habitat" / "jung_l2_raw.tif", + crosswalk=DATADIR / "crosswalk.csv", + params: + updates_dir=DATADIR / "habitat" / "lvl2_changemasks_ver004", + output_dir=DATADIR / "habitat" / "current_raw", + output: + sentinel=DATADIR / "habitat" / "current_raw" / ".current_raw" + threads: workflow.cores + script: + str(SRCDIR / "prepare_layers" / "make_current_map.py") + +# ============================================================================= +# Jung PNV map +# ============================================================================= + +rule jung_pnv_map: + """ + Fetch the Jung PNV map from zenodo. + """ + output: + habitat=DATADIR / "habitat" / "pnv_raw.tif" + params: + zenodo_id=config["zenodo"]["jung_pnv"]["zenodo_id"], + filename=config["zenodo"]["jung_pnv"]["filename"], + log: + DATADIR / "logs" / "download_pnv.log", + shell: + """ + reclaimer zenodo --zenodo_id {params.zenodo_id} \ + --filename "{params.filename}" \ + --extract \ + --output {output.habitat} \ + 2>&1 | tee {log} + """