|
| 1 | +# make_bundle.py |
| 2 | +import json, numpy as np, pandas as pd |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +# ----------------------------- |
| 6 | +# CONFIG — file paths |
| 7 | +# ----------------------------- |
| 8 | +file_path_main = "../results/Scale_up_output_MS.pkl" |
| 9 | +file_path_percent = "../results/Scale_up_PERCENT_ECW_POLL_MS.pkl" |
| 10 | +file_path_cr_man = "../results/Scale_up_CR_MAN_MS.pkl" |
| 11 | +file_path_cr_repur = "../results/Scale_up_CR_REPUR_MS.pkl" |
| 12 | +file_path_coalbag = "../results/Scale_up_COALBAG_MS.pkl" |
| 13 | +file_path_cr_stock = "../results/Scale_up_CR_STOCK.pkl" |
| 14 | + |
| 15 | +# If your notebook already built this mapping, you can paste it here. |
| 16 | +# Otherwise we’ll rebuild from country_converter like your notebook does. |
| 17 | +UNRegion_list = ['Australia and New Zealand','Caribbean','Central America','Central Asia', |
| 18 | + 'Eastern Africa','Eastern Asia','Eastern Europe','Melanesia','Micronesia', |
| 19 | + 'Middle Africa','Northern Africa','Northern America','Northern Europe', |
| 20 | + 'Polynesia','South America','South-eastern Asia','Southern Africa', |
| 21 | + 'Southern Asia','Southern Europe','Western Africa','Western Asia','Western Europe'] |
| 22 | + |
| 23 | +# ----------------------------- |
| 24 | +# Helpers |
| 25 | +# ----------------------------- |
| 26 | +def coerce_numeric(v): |
| 27 | + """ |
| 28 | + Safely make a float out of: |
| 29 | + - plain numbers |
| 30 | + - uncertainties.ufloat objects (use nominal_value) |
| 31 | + - strings/None -> return None |
| 32 | + """ |
| 33 | + try: |
| 34 | + # ufloat support without importing uncertainties in this script |
| 35 | + # has attributes .nominal_value / .std_dev? |
| 36 | + nv = getattr(v, "nominal_value", None) |
| 37 | + if nv is not None: |
| 38 | + return float(nv) |
| 39 | + return float(v) |
| 40 | + except Exception: |
| 41 | + return None |
| 42 | + |
| 43 | +def clean_df_numeric(df): |
| 44 | + """Return a copy with all numeric-like entries coerced to float, others -> None.""" |
| 45 | + return df.applymap(coerce_numeric) |
| 46 | + |
| 47 | +def df_to_weekmap(df): |
| 48 | + """ |
| 49 | + Input: df indexed by UN region; columns are week identifiers (int/str). |
| 50 | + Output: dict: { region: { '1': value, '2': value, ... } } |
| 51 | + Only keeps numeric columns; stringifies week keys. |
| 52 | + """ |
| 53 | + out = {} |
| 54 | + for region, row in df.iterrows(): |
| 55 | + series = {} |
| 56 | + for col in row.index: |
| 57 | + # keep only columns that look like week numbers |
| 58 | + try: |
| 59 | + week = str(int(col)) |
| 60 | + except Exception: |
| 61 | + continue |
| 62 | + val = coerce_numeric(row[col]) |
| 63 | + series[week] = val |
| 64 | + out[region] = series |
| 65 | + return out |
| 66 | + |
| 67 | +def load_nominal_pickle(path): |
| 68 | + """ |
| 69 | + Load a pickle like in your notebook and return: |
| 70 | + - df_nominal with only week columns (drop the first 2 meta columns) |
| 71 | + Assumes your pickles have first 2 cols = ECW bounds, then week columns. |
| 72 | + """ |
| 73 | + df = pd.read_pickle(path) |
| 74 | + # keep only target UN regions (same as notebook) |
| 75 | + df = df.loc[UNRegion_list] |
| 76 | + # drop 1st two metadata columns (bounds) |
| 77 | + # (if your pickle schema differs, adjust this slice) |
| 78 | + week_cols = df.columns[2:] |
| 79 | + df_nominal = df[week_cols].copy() |
| 80 | + df_nominal = clean_df_numeric(df_nominal) |
| 81 | + return df_nominal, week_cols |
| 82 | + |
| 83 | +# ----------------------------- |
| 84 | +# Build region_to_iso via country_converter (like your notebook) |
| 85 | +# ----------------------------- |
| 86 | +def build_region_to_iso(): |
| 87 | + try: |
| 88 | + import country_converter as coco |
| 89 | + except ImportError: |
| 90 | + raise SystemExit("Please install country_converter: pip install country_converter") |
| 91 | + |
| 92 | + cc = coco.CountryConverter() |
| 93 | + mapping = {} |
| 94 | + # cc.data has columns "UNregion" and "ISO3"; some rows may have NaN |
| 95 | + df_cc = cc.data |
| 96 | + for region in UNRegion_list: |
| 97 | + iso_list = df_cc.loc[df_cc["UNregion"] == region, "ISO3"].dropna().tolist() |
| 98 | + mapping[region] = [str(x) for x in iso_list if isinstance(x, str)] |
| 99 | + return mapping |
| 100 | + |
| 101 | +# ----------------------------- |
| 102 | +# MAIN |
| 103 | +# ----------------------------- |
| 104 | +def main(): |
| 105 | + print("[1/6] Loading datasets...") |
| 106 | + df_main, wk_main = load_nominal_pickle(file_path_main) |
| 107 | + df_percent, wk_percent = load_nominal_pickle(file_path_percent) |
| 108 | + df_cr_man, wk_man = load_nominal_pickle(file_path_cr_man) |
| 109 | + df_cr_repur, wk_repur = load_nominal_pickle(file_path_cr_repur) |
| 110 | + df_coalbag, wk_coal = load_nominal_pickle(file_path_coalbag) |
| 111 | + df_cr_stock, wk_stock = load_nominal_pickle(file_path_cr_stock) |
| 112 | + |
| 113 | + # sanity: derive weeks from the largest span we saw |
| 114 | + # Prefer numeric weeks 1..N in your pickles; convert to strings |
| 115 | + max_weeks = max(len(wk_main), len(wk_percent), len(wk_man), len(wk_repur), len(wk_coal), len(wk_stock)) |
| 116 | + weeks = [str(i) for i in range(1, max_weeks + 1)] |
| 117 | + print(f"[2/6] Weeks detected: 1..{max_weeks}") |
| 118 | + |
| 119 | + print("[3/6] Building region_to_iso mapping...") |
| 120 | + region_to_iso = build_region_to_iso() |
| 121 | + |
| 122 | + print("[4/6] Converting dataframes to JSON structures...") |
| 123 | + datasets = { |
| 124 | + "ALL": df_to_weekmap(df_main), |
| 125 | + "ECW Coverage %": df_to_weekmap(df_percent), |
| 126 | + "CR Box Manufacturing":df_to_weekmap(df_cr_man), |
| 127 | + "CR Box Repurposing": df_to_weekmap(df_cr_repur), |
| 128 | + "Coalbaghouse": df_to_weekmap(df_coalbag), |
| 129 | + "CR Box Stock": df_to_weekmap(df_cr_stock), |
| 130 | + } |
| 131 | + |
| 132 | + print("[5/6] Extracting ECW per-person bounds (lower/upper) from main pickle...") |
| 133 | + # Reload main pickle (full) to read the first two columns as ECW lower/upper *per person* |
| 134 | + df_ecw_full = pd.read_pickle(file_path_main).loc[UNRegion_list] |
| 135 | + # take first two columns as [lower, upper] |
| 136 | + ecw_cols = df_ecw_full.columns[:2] |
| 137 | + ecw_per_person = {} |
| 138 | + for region in UNRegion_list: |
| 139 | + row = df_ecw_full.loc[region, ecw_cols] |
| 140 | + lower_pp = coerce_numeric(row.iloc[0]) # these are multipliers per person |
| 141 | + upper_pp = coerce_numeric(row.iloc[1]) |
| 142 | + ecw_per_person[region] = {"lower": lower_pp, "upper": upper_pp} |
| 143 | + |
| 144 | + bundle = { |
| 145 | + "weeks": weeks, |
| 146 | + "un_regions": UNRegion_list, |
| 147 | + "region_to_iso": region_to_iso, # { UN region: [ISO3,...] } |
| 148 | + "datasets": datasets, # { dataset_name: { region: { "1": val, ... } } } |
| 149 | + "ecw_per_person": ecw_per_person, # { region: {lower:x, upper:y} } (multiply by CADRPP in JS) |
| 150 | + } |
| 151 | + |
| 152 | + out_path = Path("data_bundle.json") |
| 153 | + out_path.write_text(json.dumps(bundle)) |
| 154 | + print(f"[6/6] Wrote {out_path.resolve()}") |
| 155 | + |
| 156 | +if __name__ == "__main__": |
| 157 | + main() |
0 commit comments