diff --git a/.mypy.ini b/.mypy.ini index 1215375..1396940 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -1,2 +1,3 @@ [mypy] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True +no_namespace_packages = True \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index 56e254c..2136ddc 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,3 +1,6 @@ +[MAIN] +init-hook='import sys; from pathlib import Path; script_dir = Path(".").resolve(); sys.path.extend([str(script_dir / "prepare_layers"), str(script_dir / "prepare_species")])' + [FORMAT] max-line-length=120 diff --git a/prepare_layers/make_food_current_map.py b/prepare_layers/make_food_current_map.py index 91ec634..22037fe 100644 --- a/prepare_layers/make_food_current_map.py +++ b/prepare_layers/make_food_current_map.py @@ -7,7 +7,7 @@ from pathlib import Path from multiprocessing import Manager, Process, cpu_count from queue import Queue -from typing import List, NamedTuple, Optional, Tuple +from typing import NamedTuple import numpy as np import yirgacheffe as yg @@ -32,8 +32,11 @@ def process_tile( current: yg.layers.RasterLayer, pnv: yg.layers.RasterLayer, tile: TileInfo, + random_seed: int, ) -> np.ndarray: + rng = np.random.default_rng(random_seed) + data = current.read_array(tile.x_position, tile.y_position, tile.width, tile.height) diffs = [ @@ -61,7 +64,7 @@ def process_tile( continue required_points = min(required_points, possible_points) - selected_locations = np.random.choice( + selected_locations = rng.choice( len(valid_locations[0]), size=required_points, replace=False @@ -90,13 +93,14 @@ def process_tile_concurrently( with yg.read_raster(current_lvl1_path) as current: with yg.read_raster(pnv_path) as pnv: while True: - tile: Optional[TileInfo] = input_queue.get() - if tile is None: + job : tuple[TileInfo, int] | None = input_queue.get() + if job is None: break + tile, seed = job if np.isnan(tile.crop_diff) and np.isnan(tile.pasture_diff): result_queue.put((tile, None)) else: - data = process_tile(current, pnv, tile) + data = process_tile(current, pnv, tile, seed) result_queue.put((tile, data.tobytes())) result_queue.put(None) @@ -105,7 +109,7 @@ def build_tile_list( current_lvl1_path: Path, crop_adjustment_path: Path, pasture_adjustment_path: Path, -) -> List[TileInfo]: +) -> list[TileInfo]: tiles = [] with yg.read_raster(current_lvl1_path) as current: current_dimensions = current.window.xsize, current.window.ysize @@ -151,7 +155,7 @@ def assemble_map( band = output._dataset.GetRasterBand(1) # pylint: disable=W0212 while True: - result : Optional[Tuple[TileInfo,Optional[bytearray]]] = result_queue.get() + result : tuple[TileInfo,bytearray | None] | None = result_queue.get() if result is None: sentinal_count -= 1 if sentinal_count == 0: @@ -174,14 +178,18 @@ def pipeline_source( pasture_adjustment_path: Path, source_queue: Queue, sentinal_count: int, + random_seed: int, ) -> None: + rng = np.random.default_rng(random_seed) + tiles = build_tile_list( current_lvl1_path, crop_adjustment_path, pasture_adjustment_path, ) - for tile in tiles: - source_queue.put(tile) + seeds = rng.integers(2**63, size=len(tiles)) + for tile, seed in zip(tiles, seeds): + source_queue.put((tile, seed)) for _ in range(sentinal_count): source_queue.put(None) @@ -190,6 +198,7 @@ def make_food_current_map( pnv_path: Path, crop_adjustment_path: Path, pasture_adjustment_path: Path, + random_seed: int, output_path: Path, processes_count: int, ) -> None: @@ -210,7 +219,7 @@ def make_food_current_map( pnv_path, source_queue, result_queue, - )) for index in range(processes_count)] + )) for _ in range(processes_count)] for worker_process in workers: worker_process.start() @@ -220,6 +229,7 @@ def make_food_current_map( pasture_adjustment_path, source_queue, processes_count, + random_seed, )) source_worker.start() @@ -273,6 +283,13 @@ def main() -> None: help="Path of adjustment for pasture diff", dest="pasture_adjustment_path", ) + parser.add_argument( + "--seed", + type=int, + required=True, + help="Seed the random number generator", + dest="seed", + ) parser.add_argument( '--output', type=Path, @@ -295,6 +312,7 @@ def main() -> None: args.pnv_path, args.crop_adjustment_path, args.pasture_adjustment_path, + args.seed, args.output_path, args.processes_count, ) diff --git a/prepare_species/common.py b/prepare_species/common.py index 6a60a56..ee357e9 100644 --- a/prepare_species/common.py +++ b/prepare_species/common.py @@ -20,6 +20,7 @@ COLUMNS = [ "id_no", "assessment_id", + "assessment_year", "season", "elevation_lower", "elevation_upper", @@ -170,7 +171,7 @@ def process_habitats( def process_geometries( - geometries_data: List[Tuple[int,shapely.Geometry]], + geometries_data: List[Tuple[int,str]], report: SpeciesReport, ) -> Dict[int,shapely.Geometry]: if len(geometries_data) == 0: @@ -226,7 +227,8 @@ def process_and_save( output_directory_path: Path, ) -> None: - id_no, assessment_id, elevation_lower, elevation_upper, scientific_name, family_name, threat_code = row + id_no, assessment_id, assessment_year, elevation_lower, elevation_upper, scientific_name, \ + family_name, threat_code = row seasons = set(geometries.keys()) | set(habitats.keys()) @@ -237,6 +239,7 @@ def process_and_save( [[ id_no, assessment_id, + int(assessment_year), SEASON_NAME[1], int(elevation_lower) if elevation_lower is not None else None, int(elevation_upper) if elevation_upper is not None else None, @@ -299,6 +302,7 @@ def process_and_save( [[ id_no, assessment_id, + int(assessment_year), SEASON_NAME[2], int(elevation_lower) if elevation_lower is not None else None, int(elevation_upper) if elevation_upper is not None else None, @@ -318,6 +322,7 @@ def process_and_save( [[ id_no, assessment_id, + int(assessment_year), SEASON_NAME[3], int(elevation_lower) if elevation_lower is not None else None, int(elevation_upper) if elevation_upper is not None else None, diff --git a/prepare_species/extract_species_psql.py b/prepare_species/extract_species_psql.py index d8fd55f..dd45248 100644 --- a/prepare_species/extract_species_psql.py +++ b/prepare_species/extract_species_psql.py @@ -22,6 +22,7 @@ SELECT assessments.sis_taxon_id as id_no, assessments.id as assessment_id, + DATE_PART('year', assessments.assessment_date) as assessment_year, (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower, (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper, taxons.scientific_name, @@ -110,7 +111,16 @@ def process_row( register(connection) cursor = connection.cursor() - (id_no, assessment_id, _elevation_lower, _elevation_upper, scientific_name, _family_name, _threat_code) = row + ( + id_no, + assessment_id, + _assessment_year, + _elevation_lower, + _elevation_upper, + scientific_name, + _family_name, + _threat_code, + ) = row report = SpeciesReport(id_no, assessment_id, scientific_name) if id_no in overrides: report.overriden = True diff --git a/scripts/generate_food_map.sh b/scripts/generate_food_map.sh old mode 100644 new mode 100755 index 0398c20..9d93a88 --- a/scripts/generate_food_map.sh +++ b/scripts/generate_food_map.sh @@ -6,6 +6,7 @@ # downloaded and generated `current_raw.tif` from the original LIFE pipeline (see run.sh) set -e +set -x if [ -z "${DATADIR}" ]; then echo "Please specify $DATADIR" @@ -67,20 +68,29 @@ if [ ! -d "${DATADIR}"/food/current_layers ]; then fi # Combine GAEZ and HYDE data -python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/food/GLCSv11_02_5m.tif \ - --hyde "${DATADIR}"/food/modified_grazing2017AD.asc \ - --output "${DATADIR}"/food/ +if [ ! -f "${DATADIR}"/food/crop.tif ] || [ ! -f "${DATADIR}"/food/pasture.tif ]; then + python3 ./prepare_layers/build_gaez_hyde.py --gaez "${DATADIR}"/food/GLCSv11_02_5m.tif \ + --hyde "${DATADIR}"/food/modified_grazing2017AD.asc \ + --output "${DATADIR}"/food/ +fi -python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/crop.tif \ - --raster_b "${DATADIR}"/food/current_layers/lcc_1401.tif \ - --output "${DATADIR}"/food/crop_diff.tif +if [ ! -f "${DATADIR}"/food/crop_diff.tif ]; then + python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/crop.tif \ + --raster_b "${DATADIR}"/food/current_layers/lcc_1401.tif \ + --output "${DATADIR}"/food/crop_diff.tif +fi -python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/pasture.tif \ - --raster_b "${DATADIR}"/food/current_layers/lcc_1402.tif \ - --output "${DATADIR}"/food/pasture_diff.tif +if [ ! -f "${DATADIR}"/food/pasture_diff.tif ]; then + python3 ./utils/raster_diff.py --raster_a "${DATADIR}"/food/pasture.tif \ + --raster_b "${DATADIR}"/food/current_layers/lcc_1402.tif \ + --output "${DATADIR}"/food/pasture_diff.tif +fi -python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/habitat/current_raw.tif \ - --pnv "${DATADIR}"/habitat/pnv_raw.tif \ - --crop_diff "${DATADIR}"/food/crop_diff.tif \ - --pasture_diff "${DATADIR}"/food/pasture_diff.tif \ - --output "${DATADIR}"/food/current_raw.tif +if [ ! -f "${DATADIR}"/food/current_raw.tif ]; then + python3 ./prepare_layers/make_food_current_map.py --current_lvl1 "${DATADIR}"/habitat/current_raw.tif \ + --pnv "${DATADIR}"/habitat/pnv_raw.tif \ + --crop_diff "${DATADIR}"/food/crop_diff.tif \ + --pasture_diff "${DATADIR}"/food/pasture_diff.tif \ + --seed 42 \ + --output "${DATADIR}"/food/current_raw.tif +fi diff --git a/tests/test_food_layer.py b/tests/test_food_layer.py index 96f5e0e..43edba9 100644 --- a/tests/test_food_layer.py +++ b/tests/test_food_layer.py @@ -48,9 +48,17 @@ ]) def test_process_tile_all(initial, crop_diff, pasture_diff, expected) -> None: - with RasterLayer.empty_raster_layer(Area(-180, 90, 180, -90), PixelScale(1.0, -1.0), datatype=DataType.Int16) as current: + with RasterLayer.empty_raster_layer( + Area(-180, 90, 180, -90), + PixelScale(1.0, -1.0), + datatype=DataType.Int16, + ) as current: yg.constant(initial).save(current) - with RasterLayer.empty_raster_layer(Area(-180, 90, 180, -90), PixelScale(1.0, -1.0), datatype=DataType.Int16) as pnv: + with RasterLayer.empty_raster_layer( + Area(-180, 90, 180, -90), + PixelScale(1.0, -1.0), + datatype=DataType.Int16, + ) as pnv: yg.constant(31).save(pnv) test_tile = TileInfo( @@ -62,11 +70,11 @@ def test_process_tile_all(initial, crop_diff, pasture_diff, expected) -> None: pasture_diff=pasture_diff, ) - result = process_tile(current, pnv, test_tile) + result = process_tile(current, pnv, test_tile, 42) expected = np.full((12, 10), expected, dtype=np.int16) assert (result == expected).all() -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ +@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ # pylint:disable=C0301 (42, float("nan"), float("nan"), 0, 0, 0), (42, 0.5, float("nan"), 50, 0, 0), (42, float("nan"), 0.5, 0, 50, 0), @@ -81,10 +89,25 @@ def test_process_tile_all(initial, crop_diff, pasture_diff, expected) -> None: (1402, float("nan"), -0.5, 0, 50, 50), (1402, -0.5, float("nan"), 0, 100, 0), ]) -def test_partial_replacement(initial, crop_diff, pasture_diff, expected_crop_count, expected_pasture_count, expected_pnv_count) -> None: - with RasterLayer.empty_raster_layer(Area(-180, 90, 180, -90), PixelScale(1.0, -1.0), datatype=DataType.Int16) as current: +def test_partial_replacement( + initial : int, + crop_diff : float, + pasture_diff : float, + expected_crop_count : int, + expected_pasture_count : int, + expected_pnv_count : int, +) -> None: + with RasterLayer.empty_raster_layer( + Area(-180, 90, 180, -90), + PixelScale(1.0, -1.0), + datatype=DataType.Int16, + ) as current: yg.constant(initial).save(current) - with RasterLayer.empty_raster_layer(Area(-180, 90, 180, -90), PixelScale(1.0, -1.0), datatype=DataType.Int16) as pnv: + with RasterLayer.empty_raster_layer( + Area(-180, 90, 180, -90), + PixelScale(1.0, -1.0), + datatype=DataType.Int16, + ) as pnv: yg.constant(31).save(pnv) test_tile = TileInfo( @@ -96,7 +119,7 @@ def test_partial_replacement(initial, crop_diff, pasture_diff, expected_crop_cou pasture_diff=pasture_diff, ) - result = process_tile(current, pnv, test_tile) + result = process_tile(current, pnv, test_tile, 42) crop_count = (result == 1401).sum() assert crop_count == expected_crop_count pasture_count = (result == 1402).sum() @@ -104,7 +127,7 @@ def test_partial_replacement(initial, crop_diff, pasture_diff, expected_crop_cou pnv_count = (result == 31).sum() assert pnv_count == expected_pnv_count -@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ +@pytest.mark.parametrize("initial,crop_diff,pasture_diff,expected_crop_count,expected_pasture_count,expected_pnv_count", [ # pylint:disable=C0301 (42, float("nan"), float("nan"), 0, 0, 0), (42, 0.5, float("nan"), 50, 0, 0), (42, float("nan"), 0.5, 0, 50, 0), @@ -128,8 +151,19 @@ def test_partial_replacement(initial, crop_diff, pasture_diff, expected_crop_cou (1405, -0.5, float("nan"), 0, 0, 0), (1405, -1.0, float("nan"), 0, 0, 0), ]) -def test_partial_initial_tile(initial, crop_diff, pasture_diff, expected_crop_count, expected_pasture_count, expected_pnv_count) -> None: - with RasterLayer.empty_raster_layer(Area(-180, 90, 180, -90), PixelScale(1.0, -1.0), datatype=DataType.Int16) as current: +def test_partial_initial_tile( + initial : int, + crop_diff : float, + pasture_diff : float, + expected_crop_count : int, + expected_pasture_count : int, + expected_pnv_count : int, +) -> None: + with RasterLayer.empty_raster_layer( + Area(-180, 90, 180, -90), + PixelScale(1.0, -1.0), + datatype=DataType.Int16, + ) as current: # Cheating as Yirgacheffe doesn't have a neat way to provide numpy data straight to a layer band = current._dataset.GetRasterBand(1) @@ -137,7 +171,11 @@ def test_partial_initial_tile(initial, crop_diff, pasture_diff, expected_crop_co all_vals = np.tile(vals, (90, 180)) band.WriteArray(all_vals, 0, 0) - with RasterLayer.empty_raster_layer(Area(-180, 90, 180, -90), PixelScale(1.0, -1.0), datatype=DataType.Int16) as pnv: + with RasterLayer.empty_raster_layer( + Area(-180, 90, 180, -90), + PixelScale(1.0, -1.0), + datatype=DataType.Int16, + ) as pnv: yg.constant(31).save(pnv) test_tile = TileInfo( @@ -149,7 +187,7 @@ def test_partial_initial_tile(initial, crop_diff, pasture_diff, expected_crop_co pasture_diff=pasture_diff, ) - result = process_tile(current, pnv, test_tile) + result = process_tile(current, pnv, test_tile, 42) crop_count = (result == 1401).sum() assert crop_count == expected_crop_count pasture_count = (result == 1402).sum() diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py index d8464c1..a709246 100644 --- a/utils/persistencegenerator.py +++ b/utils/persistencegenerator.py @@ -7,6 +7,7 @@ def species_generator( data_dir: Path, + aohs_path: Path | None, curve: str, output_csv_path: Path, scenarios: List[str], @@ -14,6 +15,9 @@ def species_generator( species_info_dir = data_dir / "species-info" taxas = [x.name for x in species_info_dir.iterdir()] + if aohs_path is None: + aohs_path = data_dir / "aohs" + if curve not in ["0.1", "0.25", "0.5", "1.0", "gompertz"]: sys.exit(f'curve {curve} not in expected set of values: ["0.1", "0.25", "0.5", "1.0", "gompertz"]') @@ -25,9 +29,9 @@ def species_generator( for species in speciess: res.append([ species, - data_dir / "aohs" / "current" / taxa, - data_dir / "aohs" / scenario / taxa, - data_dir / "aohs" / "pnv" / taxa, + aohs_path / "current" / taxa, + aohs_path / scenario / taxa, + aohs_path / "pnv" / taxa, curve, data_dir / "deltap" / scenario / curve / taxa, ]) @@ -52,6 +56,13 @@ def main() -> None: required=True, dest="data_dir", ) + parser.add_argument( + '--aohs_path', + type=Path, + help="Path to find AOHs in", + required=False, + dest="aohs_path", + ) parser.add_argument( '--curve', type=str, @@ -77,7 +88,13 @@ def main() -> None: ) args = parser.parse_args() - species_generator(args.data_dir, args.curve, args.output, args.scenarios) + species_generator( + args.data_dir, + args.aohs_path, + args.curve, + args.output, + args.scenarios, + ) if __name__ == "__main__": main() diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py index a6cec90..85e2808 100644 --- a/utils/speciesgenerator.py +++ b/utils/speciesgenerator.py @@ -11,14 +11,22 @@ def species_generator( data_dir: Path, output_csv_path: Path, scenarios: List[str], + habitats_path: Path | None, + aohs_path: Path | None, ): species_info_dir = data_dir / "species-info" taxas = [x.name for x in species_info_dir.iterdir()] + if habitats_path is None: + habitats_path = data_dir / "habitat_maps" + + if aohs_path is None: + aohs_path = data_dir / "aohs" + res = [] for taxa in taxas: for scenario in scenarios: - habitat_maps_path = data_dir / "habitat_maps" / scenario + habitat_maps_path = habitats_path / scenario if not habitat_maps_path.exists(): sys.exit(f"Expected to find habitat maps in {habitat_maps_path}") @@ -36,7 +44,7 @@ def species_generator( data_dir / "area-per-pixel.tif", data_dir / "crosswalk.csv", species, - data_dir / "aohs" / scenario / taxa, + aohs_path / scenario / taxa, ]) df = pd.DataFrame(res, columns=[ @@ -74,9 +82,29 @@ def main() -> None: required=True, dest="scenarios", ) + parser.add_argument( + '--habitats_path', + type=Path, + help="Path to directory containing different processed habitat layers", + required=False, + dest="habitats_path", + ) + parser.add_argument( + '--aohs_path', + type=Path, + help="Path to store AOHs in", + required=False, + dest="aohs_path", + ) args = parser.parse_args() - species_generator(args.data_dir, args.output, args.scenarios + DEFAULT_SCENARIOS) + species_generator( + args.data_dir, + args.output, + args.scenarios + DEFAULT_SCENARIOS, + args.habitats_path, + args.aohs_path + ) if __name__ == "__main__": main()