-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchelsa_preprocessing.py
More file actions
108 lines (87 loc) · 4.02 KB
/
chelsa_preprocessing.py
File metadata and controls
108 lines (87 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import pandas as pd
from tqdm import tqdm
import configparser
import glob
import xarray as xr
import geopandas as gpd
import os
import pyproj
import xagg as xa
# To avoid PROJ error:
os.environ['PROJ_LIB'] = pyproj.datadir.get_data_dir()
# Silence xagg prints
xa.set_options(silent=True)
def chelsa_w5e5_agg(gpkg_path_or_layer, config_path="config.ini", force=False):
config = configparser.ConfigParser()
config.read(config_path)
base_dir = config["reanalysis"]["chelsa_base_dir"]
target_subdir = config.get("reanalysis", "chelsa_target_dir", fallback="aggregates")
output_dir = os.path.join(base_dir, target_subdir)
os.makedirs(output_dir, exist_ok=True)
variables = [v.strip() for v in config["reanalysis"]["chelsa_vars"].split(",")]
convert_temp = config.getboolean("reanalysis", "convert_temperatures")
fill_value = 65535
conversions = {
"pr": {"factor": 86400, "offset": 0},
"tas": {"factor": 1, "offset": -273.15 if convert_temp else 0},
"tasmax": {"factor": 1, "offset": -273.15 if convert_temp else 0},
"tasmin": {"factor": 1, "offset": -273.15 if convert_temp else 0},
"rsds": {"factor": 1, "offset": 0},
}
polygon = gpd.read_file(gpkg_path_or_layer)
poly_name = os.path.splitext(os.path.basename(gpkg_path_or_layer))[0]
polygon = polygon.to_crs("EPSG:4326")
# Check if all variable files and elevation file already exist
existing_outputs = [os.path.join(output_dir, f"{poly_name}_{var}.csv") for var in variables]
elevation_output = os.path.join(output_dir, f"{poly_name}_elevation.csv")
all_exist = all(os.path.exists(f) for f in existing_outputs) and os.path.exists(elevation_output)
if all_exist and not force:
print(f"CHELSA output for '{poly_name}' already exists. Skipping.")
return
# Process orography (mean elevation)
orog_file = sorted(glob.glob(f"{base_dir}/orog/*.nc"))[0]
ds_orog = xr.open_dataset(orog_file)
weightmap_orog = xa.pixel_overlaps(ds_orog, polygon)
agg_orog = xa.aggregate(ds_orog["orog"], weightmap_orog)
mean_elevation = agg_orog.to_dataframe()['orog'].values[0]
print(f"Area-weighted mean elevation: {mean_elevation:.2f} m")
df_orog = pd.DataFrame({"polygon": [poly_name], "elevation": [round(mean_elevation, 2)]})
df_orog.to_csv(elevation_output, index=False)
ds_orog.close()
for var in variables:
print(f"\n ** Processing variable: {var}")
files = sorted(glob.glob(f"{base_dir}/{var}/*.nc"))
if not files:
print(f"** No files found for {var}, skipping.")
continue
results = []
previous_grid = None
weightmap = None
for file in tqdm(files, desc=f"\u2192 {var}"):
ds = xr.open_dataset(file)
ds[var] = ds[var] * conversions[var]["factor"] + conversions[var]["offset"]
ds[var] = ds[var].where(ds[var] != fill_value)
current_grid = (tuple(ds["lat"].values), tuple(ds["lon"].values))
if current_grid != previous_grid:
try:
weightmap = xa.pixel_overlaps(ds, polygon)
previous_grid = current_grid
except Exception as e:
print(f"\nFailed to generate weight map for {file}: {e}")
ds.close()
continue
try:
agg = xa.aggregate(ds[var], weightmap)
df = agg.to_dataframe().reset_index()[["time", var]]
df = round(df, 4)
results.append(df)
except Exception as e:
print(f"\nFailed to aggregate {file}: {e}")
ds.close()
final_df = pd.concat(results)
output_file = os.path.join(output_dir, f"{poly_name}_{var}.csv")
final_df.to_csv(output_file, index=False)
print(f"** Saved: {output_file}")
print("\n** Done.")
# Example usage
# chelsa_w5e5_agg("/home/phillip/Seafile/EBA-CA/Papers/No3_Issyk-Kul/geodata/catchments/cholpon_ata/gpkg_glaciers/cholpon_ata_catchment.gpkg")