From 365bbf4155576e5846c280abf5a2daa561a5a753 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Mon, 19 Dec 2022 16:50:50 -0600
Subject: [PATCH 01/11] Add test script to process nwm forcing into ngen input

---
 ngen_forcing/process_nwm_forcing.py | 68 +++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 ngen_forcing/process_nwm_forcing.py

diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/process_nwm_forcing.py
new file mode 100644
index 0000000..a9e1ec8
--- /dev/null
+++ b/ngen_forcing/process_nwm_forcing.py
@@ -0,0 +1,68 @@
+# import rioxarray as rxr
+import xarray as xr
+import geopandas as gpd
+from rasterstats import zonal_stats
+# import rasterio
+import pandas as pd
+
+import warnings
+warnings.simplefilter("ignore")
+
+# Read forcing files
+# Generate List of files
+
+# TODO: Add looping through lists of forcing files
+# consider looking at the "listofnwmfilenames.py" in the data_access_examples repository.
+# Integer values for runinput, varinput, etc. are listed at the top of the file
+# and an example is given in the `main` function. 
+
+# import listofnwmfilenames
+#     create_file_list(
+#         runinput,
+#         varinput,
+#         geoinput,
+#         meminput,
+#         start_date,
+#         end_date,
+#         fcst_cycle,
+#     )
+
+#for file in list_of_files:
+fc_nc = ("nwm.t00z.medium_range.forcing.f001.conus.nc")
+fc_xds = xr.open_dataset(fc_nc)
+
+reng = "rasterio"
+fc_xds = xr.open_dataset(fc_nc, engine=reng)
+
+# Read basin boundary file
+f_03 = "03w/nextgen_03W.gpkg"
+
+gpkg_03w_divides = gpd.read_file(f_03, layer="divides")
+
+list_of_vars = [
+    "U2D"      ,
+    "V2D"     ,
+    "LWDOWN" ,
+    "RAINRATE",
+    "T2D"   ,
+    "Q2D"   ,
+    "PSFC" ,
+    "SWDOWN",
+]
+
+df_dict = {}
+for _v in list_of_vars:
+    with xr.open_dataset(fc_nc, engine=reng) as _xds:
+        _src = _xds[_v]
+        _aff2 = _src.rio.transform()
+        _arr2 = _src.values[0]
+        _df_zonal_stats = pd.DataFrame(zonal_stats(gpkg_03w_divides, _arr2, affine=_aff2))
+    
+    df_dict[_v] = pd.DataFrame(index=_df_zonal_stats.index)
+    # adding statistics back to original GeoDataFrame
+    # gdf3 = pd.concat([gpkg_03w_divides, _df_zonal_stats], axis=1)
+    df_dict[_v][fc_xds.time.values[0]]=_df_zonal_stats["mean"]
+
+# TODO: Convert the output to CSV with something like
+# `gdf3.to_csv`
+

From 58381b4aa21490034438cecff52a8b7b76616435 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Tue, 20 Dec 2022 12:11:54 -0600
Subject: [PATCH 02/11] rewrite to include file-loop

---
 ngen_forcing/process_nwm_forcing.py | 52 +++++++++++++++++++----------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/process_nwm_forcing.py
index a9e1ec8..d3a3389 100644
--- a/ngen_forcing/process_nwm_forcing.py
+++ b/ngen_forcing/process_nwm_forcing.py
@@ -5,6 +5,7 @@
 # import rasterio
 import pandas as pd
 
+from pathlib import Path
 import warnings
 warnings.simplefilter("ignore")
 
@@ -27,18 +28,23 @@
 #         fcst_cycle,
 #     )
 
-#for file in list_of_files:
-fc_nc = ("nwm.t00z.medium_range.forcing.f001.conus.nc")
-fc_xds = xr.open_dataset(fc_nc)
+"""
+A set of test files can be generated downloading these files
+wget -P data -c https://storage.googleapis.com/national-water-model/nwm.20220824/forcing_medium_range/nwm.t12z.medium_range.forcing.f001.conus.nc
+wget -P data -c https://storage.googleapis.com/national-water-model/nwm.20220824/forcing_medium_range/nwm.t12z.medium_range.forcing.f002.conus.nc
+wget -P data -c https://storage.googleapis.com/national-water-model/nwm.20220824/forcing_medium_range/nwm.t12z.medium_range.forcing.f003.conus.nc
+"""
 
-reng = "rasterio"
-fc_xds = xr.open_dataset(fc_nc, engine=reng)
+folder_prefix = Path('data')
+list_of_files = [
+    "nwm.t12z.medium_range.forcing.f001.conus.nc",
+    "nwm.t12z.medium_range.forcing.f002.conus.nc",
+    "nwm.t12z.medium_range.forcing.f003.conus.nc",
+]
 
 # Read basin boundary file
 f_03 = "03w/nextgen_03W.gpkg"
-
-gpkg_03w_divides = gpd.read_file(f_03, layer="divides")
-
+gpkg_divides = gpd.read_file(f_03, layer="divides")
 list_of_vars = [
     "U2D"      ,
     "V2D"     ,
@@ -52,16 +58,26 @@
 
 df_dict = {}
 for _v in list_of_vars:
-    with xr.open_dataset(fc_nc, engine=reng) as _xds:
-        _src = _xds[_v]
-        _aff2 = _src.rio.transform()
-        _arr2 = _src.values[0]
-        _df_zonal_stats = pd.DataFrame(zonal_stats(gpkg_03w_divides, _arr2, affine=_aff2))
-    
-    df_dict[_v] = pd.DataFrame(index=_df_zonal_stats.index)
-    # adding statistics back to original GeoDataFrame
-    # gdf3 = pd.concat([gpkg_03w_divides, _df_zonal_stats], axis=1)
-    df_dict[_v][fc_xds.time.values[0]]=_df_zonal_stats["mean"]
+    df_dict[_v] = pd.DataFrame(index=gpkg_divides.index)
+
+reng = "rasterio"
+sum_stat = "mean"
+
+for _nc_file in list_of_files:
+    # _nc_file = ("nwm.t00z.medium_range.forcing.f001.conus.nc")
+    _full_nc_file = (folder_prefix.joinpath(_nc_file))
+
+    with xr.open_dataset(_full_nc_file, engine=reng) as _xds:
+        for _v in list_of_vars:
+            _src = _xds[_v]
+            _aff2 = _src.rio.transform()
+            _arr2 = _src.values[0]
+
+            breakpoint()
+            _df_zonal_stats = pd.DataFrame(zonal_stats(gpkg_divides, _arr2, affine=_aff2))
+            # if adding statistics back to original GeoDataFrame
+            # gdf3 = pd.concat([gpkg_divides, _df_zonal_stats], axis=1)
+            df_dict[_v][_xds.time.values[0]]=_df_zonal_stats[sum_stat]
 
 # TODO: Convert the output to CSV with something like
 # `gdf3.to_csv`

From b3c024d83bdca8a9bc06f611f91c89506f3beef3 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Tue, 20 Dec 2022 12:16:08 -0600
Subject: [PATCH 03/11] include gpkg download in example

---
 ngen_forcing/process_nwm_forcing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/process_nwm_forcing.py
index d3a3389..d7d86ae 100644
--- a/ngen_forcing/process_nwm_forcing.py
+++ b/ngen_forcing/process_nwm_forcing.py
@@ -33,6 +33,7 @@
 wget -P data -c https://storage.googleapis.com/national-water-model/nwm.20220824/forcing_medium_range/nwm.t12z.medium_range.forcing.f001.conus.nc
 wget -P data -c https://storage.googleapis.com/national-water-model/nwm.20220824/forcing_medium_range/nwm.t12z.medium_range.forcing.f002.conus.nc
 wget -P data -c https://storage.googleapis.com/national-water-model/nwm.20220824/forcing_medium_range/nwm.t12z.medium_range.forcing.f003.conus.nc
+wget -P 03w -c https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_03W.gpkg
 """
 
 folder_prefix = Path('data')

From 9c35af78e6eb898fe195902aba3927e0deae7709 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Thu, 22 Dec 2022 12:17:22 -0600
Subject: [PATCH 04/11] add optimized loop and port to functions

---
 .../TestConvert_NWMForcing_to_Ngen.py         | 178 ++++++++++++++++++
 ngen_forcing/defs.py                          |  18 ++
 ngen_forcing/process_nwm_forcing.py           | 167 +++++++++++-----
 3 files changed, 319 insertions(+), 44 deletions(-)
 create mode 100644 ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
 create mode 100644 ngen_forcing/defs.py

diff --git a/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py b/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
new file mode 100644
index 0000000..8f4158f
--- /dev/null
+++ b/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
@@ -0,0 +1,178 @@
+from defs import xr_read_window, polymask
+from rasterio import _io, windows
+import concurrent.futures
+import xarray as xr
+
+
+class MemoryDataset(_io.MemoryDataset, windows.WindowMethodsMixin):
+    pass
+
+
+def junk():
+
+    flist = gpkg_divides.geometry.to_list()
+    polys = flist
+
+
+def get_forcing_dict_newway(
+    feature_list,
+    folder_prefix,
+    file_list,
+):
+
+    reng = "rasterio"
+    _xds = xr.open_dataset(folder_prefix.joinpath(file_list[0]), engine=reng)
+    _template_arr = _xds.U2D.values
+    _u2d = MemoryDataset(
+        _template_arr,
+        transform=_xds.U2D.rio.transform(),
+        gcps=None,
+        rpcs=None,
+        crs=None,
+        copy=False,
+    )
+
+    filehandles = [xr.open_dataset("data/" + f) for f in file_list]
+    stats = []
+    for i, m in enumerate(map(polymask(_u2d), feature_list)):
+        print(f"{i}, {round(i/len(feature_list), 5)*100}".ljust(40), end="\r")
+        mask, _, window = m
+        mask = xr.DataArray(mask, dims=["y", "x"])
+        winslices = dict(zip(["y", "x"], window.toslices()))
+        for f in filehandles:
+            cropped = xr_read_window(f, winslices, mask=mask)
+            stats.append(cropped.mean())
+    [f.close() for f in filehandles]  # Returns None for each file
+
+    return stats
+
+
+def get_forcing_dict_newway_parallel(
+        feature_list,
+        folder_prefix,
+        file_list,
+        ):
+
+    reng = "rasterio"
+    _xds = xr.open_dataset(folder_prefix.joinpath(file_list[0]), engine=reng)
+    _template_arr = _xds.U2D.values
+
+    _u2d = MemoryDataset(
+        _template_arr,
+        transform=_xds.U2D.rio.transform(),
+        gcps=None,
+        rpcs=None,
+        crs=None,
+        copy=False,
+    )
+    filehandles = [xr.open_dataset("data/" + f) for f in file_list]
+
+    with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
+    # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+        stats = []
+        future_list = []
+
+        for i, m in enumerate(map(polymask(_u2d), feature_list)):
+            print(f"{i}, {round(i/len(feature_list), 5)*100}".ljust(40), end="\r")
+            mask, _, window = m
+            mask = xr.DataArray(mask, dims=["y", "x"])
+            winslices = dict(zip(["y", "x"], window.toslices()))
+            for f in filehandles:
+                future = executor.submit(xr_read_window, f, winslices, mask=mask)
+                # cropped = xr_read_window(f, winslices, mask=mask)
+                # stats.append(cropped.mean())
+                future_list.append(future)
+        for _f in concurrent.futures.as_completed(future_list):
+            stats.append(_f.result().mean())
+
+    [f.close() for f in filehandles]
+    return stats
+
+
+def get_forcing_dict_newway_inverted(
+    feature_list,
+    folder_prefix,
+    file_list,
+):
+
+    reng = "rasterio"
+    _xds = xr.open_dataset(folder_prefix.joinpath(file_list[0]), engine=reng)
+    _template_arr = _xds.U2D.values
+    _u2d = MemoryDataset(
+        _template_arr,
+        transform=_xds.U2D.rio.transform(),
+        gcps=None,
+        rpcs=None,
+        crs=None,
+        copy=False,
+    )
+
+    filehandles = [xr.open_dataset("data/" + f) for f in file_list]
+    stats = []
+    future_list = []
+    mask_win_list = []
+
+    for i, m in enumerate(map(polymask(_u2d), feature_list)):
+        print(f"{i}, {round(i/len(feature_list), 5)*100}".ljust(40), end="\r")
+        mask, _, window = m
+        mask = xr.DataArray(mask, dims=["y", "x"])
+        winslices = dict(zip(["y", "x"], window.toslices()))
+        mask_win_list.append((mask, winslices))
+
+    for f in filehandles:
+        print(f"{i}, {round(i/len(file_list), 2)*100}".ljust(40), end="\r")
+        for _m, _w in mask_win_list:
+            cropped = xr_read_window(f, _w, mask=_m)
+            stats.append(cropped.mean())
+
+    [f.close() for f in filehandles]
+    return stats
+
+
+def get_forcing_dict_newway_inverted_parallel(
+        feature_list,
+        folder_prefix,
+        file_list,
+        ):
+
+    import concurrent.futures
+
+    reng = "rasterio"
+    _xds = xr.open_dataset(folder_prefix.joinpath(file_list[0]), engine=reng)
+    _template_arr = _xds.U2D.values
+    _u2d = MemoryDataset(
+        _template_arr,
+        transform=_xds.U2D.rio.transform(),
+        gcps=None,
+        rpcs=None,
+        crs=None,
+        copy=False,
+    )
+
+    mask_win_list = []
+    for i, m in enumerate(map(polymask(_u2d), feature_list)):
+        print(f"{i}, {round(i/len(feature_list), 5)*100}".ljust(40), end="\r")
+        mask, _, window = m
+        mask = xr.DataArray(mask, dims=["y", "x"])
+        winslices = dict(zip(["y", "x"], window.toslices()))
+        mask_win_list.append((mask, winslices))
+
+    filehandles = [xr.open_dataset("data/" + f) for f in file_list]
+    stats = []
+    future_list = []
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+    # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+
+        for f in filehandles:
+            print(f"{i}, {round(i/len(file_list), 2)*100}".ljust(40), end="\r")
+            for _m, _w in mask_win_list:
+                future = executor.submit(xr_read_window, f, _w, mask=_m)
+                # cropped = xr_read_window(f, _w, mask=_m)
+                # stats.append(cropped.mean())
+                future_list.append(future)
+        for _f in concurrent.futures.as_completed(future_list):
+            stats.append(_f.result().mean())
+
+    [f.close() for f in filehandles]
+    return stats
diff --git a/ngen_forcing/defs.py b/ngen_forcing/defs.py
new file mode 100644
index 0000000..58b2c1d
--- /dev/null
+++ b/ngen_forcing/defs.py
@@ -0,0 +1,18 @@
+import rasterio.mask as riomask
+
+
+def polymask(dataset, invert=False, all_touched=False):
+    def _polymask(poly):
+        return riomask.raster_geometry_mask(
+            dataset, [poly], invert=invert, all_touched=all_touched, crop=True
+        )
+
+    return _polymask
+
+
+def xr_read_window(ds, window, mask=None):
+    data = ds.isel(window)
+    if mask is None:
+        return data
+    else:
+        return data.where(mask)
diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/process_nwm_forcing.py
index d7d86ae..20a2300 100644
--- a/ngen_forcing/process_nwm_forcing.py
+++ b/ngen_forcing/process_nwm_forcing.py
@@ -2,11 +2,22 @@
 import xarray as xr
 import geopandas as gpd
 from rasterstats import zonal_stats
+
 # import rasterio
 import pandas as pd
 
+import time
+
+from TestConvert_NWMForcing_to_Ngen import (
+    get_forcing_dict_newway,
+    get_forcing_dict_newway_parallel,
+    get_forcing_dict_newway_inverted,
+    get_forcing_dict_newway_inverted_parallel,
+)
+
 from pathlib import Path
 import warnings
+
 warnings.simplefilter("ignore")
 
 # Read forcing files
@@ -15,7 +26,7 @@
 # TODO: Add looping through lists of forcing files
 # consider looking at the "listofnwmfilenames.py" in the data_access_examples repository.
 # Integer values for runinput, varinput, etc. are listed at the top of the file
-# and an example is given in the `main` function. 
+# and an example is given in the `main` function.
 
 # import listofnwmfilenames
 #     create_file_list(
@@ -36,50 +47,118 @@
 wget -P 03w -c https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_03W.gpkg
 """
 
-folder_prefix = Path('data')
-list_of_files = [
-    "nwm.t12z.medium_range.forcing.f001.conus.nc",
-    "nwm.t12z.medium_range.forcing.f002.conus.nc",
-    "nwm.t12z.medium_range.forcing.f003.conus.nc",
-]
-
-# Read basin boundary file
-f_03 = "03w/nextgen_03W.gpkg"
-gpkg_divides = gpd.read_file(f_03, layer="divides")
-list_of_vars = [
-    "U2D"      ,
-    "V2D"     ,
-    "LWDOWN" ,
-    "RAINRATE",
-    "T2D"   ,
-    "Q2D"   ,
-    "PSFC" ,
-    "SWDOWN",
-]
-
-df_dict = {}
-for _v in list_of_vars:
-    df_dict[_v] = pd.DataFrame(index=gpkg_divides.index)
-
-reng = "rasterio"
-sum_stat = "mean"
-
-for _nc_file in list_of_files:
-    # _nc_file = ("nwm.t00z.medium_range.forcing.f001.conus.nc")
-    _full_nc_file = (folder_prefix.joinpath(_nc_file))
-
-    with xr.open_dataset(_full_nc_file, engine=reng) as _xds:
-        for _v in list_of_vars:
-            _src = _xds[_v]
-            _aff2 = _src.rio.transform()
-            _arr2 = _src.values[0]
-
-            breakpoint()
-            _df_zonal_stats = pd.DataFrame(zonal_stats(gpkg_divides, _arr2, affine=_aff2))
-            # if adding statistics back to original GeoDataFrame
-            # gdf3 = pd.concat([gpkg_divides, _df_zonal_stats], axis=1)
-            df_dict[_v][_xds.time.values[0]]=_df_zonal_stats[sum_stat]
+
+def get_forcing_dict(
+    gpkg_divides,
+    folder_prefix,
+    filelist,
+    var_list,
+):
+    reng = "rasterio"
+    sum_stat = "mean"
+
+    df_dict = {}
+    for _v in var_list:
+        df_dict[_v] = pd.DataFrame(index=gpkg_divides.index)
+
+    for _nc_file in filelist:
+        # _nc_file = ("nwm.t00z.medium_range.forcing.f001.conus.nc")
+        _full_nc_file = folder_prefix.joinpath(_nc_file)
+
+        with xr.open_dataset(_full_nc_file, engine=reng) as _xds:
+            for _v in var_list:
+                _src = _xds[_v]
+                _aff2 = _src.rio.transform()
+                _arr2 = _src.values[0]
+
+                _df_zonal_stats = pd.DataFrame(
+                    zonal_stats(gpkg_divides, _arr2, affine=_aff2)
+                )
+                # if adding statistics back to original GeoDataFrame
+                # gdf3 = pd.concat([gpkg_divides, _df_zonal_stats], axis=1)
+                df_dict[_v][_xds.time.values[0]] = _df_zonal_stats[sum_stat]
+
+    return df_dict
+
 
 # TODO: Convert the output to CSV with something like
 # `gdf3.to_csv`
 
+
+def main():
+
+    folder_prefix = Path("data")
+    list_of_files = [
+        f"nwm.t12z.medium_range.forcing.f{_r:03}.conus.nc" for _r in range(1, 241)
+    ]
+
+    # Read basin boundary file
+    f_03 = "03w/nextgen_03W.gpkg"
+    gpkg_divides = gpd.read_file(f_03, layer="divides")
+    var_list = [
+        "U2D",
+        "V2D",
+        "LWDOWN",
+        "RAINRATE",
+        "T2D",
+        "Q2D",
+        "PSFC",
+        "SWDOWN",
+    ]
+
+    file_list = list_of_files[0:30]
+    gpkg_subset = gpkg_divides[0:2000]
+    #file_list = list_of_files[0:3]
+    #gpkg_subset = gpkg_divides[0:20]
+    feature_list = gpkg_subset.geometry.to_list()
+
+    # start_time = time.time()
+    # print(f"Working on the original way")
+    # forcing_dict = get_forcing_dict(
+    #     gpkg_subset,
+    #     folder_prefix,
+    #     file_list,
+    #     var_list,
+    #     )
+    # print(time.time() - start_time)
+
+    start_time = time.time()
+    print(f"Working on the new way")
+    fd2 = get_forcing_dict_newway(
+        feature_list,
+        folder_prefix,
+        file_list,
+    )
+    print(time.time() - start_time)
+
+    start_time = time.time()
+    print(f"Working on the new way with threading. (It's not much better.)")
+    fd3 = get_forcing_dict_newway_parallel(
+        feature_list,
+        folder_prefix,
+        file_list,
+        )
+    print(time.time() - start_time)
+
+
+    start_time = time.time()
+    print(f"Working on the new way with loops reversed.")
+    fd4 = get_forcing_dict_newway_inverted(
+        feature_list,
+        folder_prefix,
+        file_list,
+    )
+    print(time.time() - start_time)
+
+    start_time = time.time()
+    print(f"Working on the new way with loops reversed with threading.")
+    fd4 = get_forcing_dict_newway_inverted_parallel(
+        feature_list,
+        folder_prefix,
+        file_list,
+    )
+    print(time.time() - start_time)
+
+
+if __name__ == "__main__":
+    main()

From e90c81b54b53811d66ae6691672fea3dc0afd3b3 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Thu, 22 Dec 2022 12:51:18 -0600
Subject: [PATCH 05/11] add parallel options

---
 .../TestConvert_NWMForcing_to_Ngen.py         | 40 +++++++++++++------
 ngen_forcing/process_nwm_forcing.py           | 39 ++++++++++++++----
 2 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py b/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
index 8f4158f..2a34f9a 100644
--- a/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
+++ b/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
@@ -48,10 +48,12 @@ def get_forcing_dict_newway(
 
 
 def get_forcing_dict_newway_parallel(
-        feature_list,
-        folder_prefix,
-        file_list,
-        ):
+    feature_list,
+    folder_prefix,
+    file_list,
+    para="thread",
+    para_n=2,
+):
 
     reng = "rasterio"
     _xds = xr.open_dataset(folder_prefix.joinpath(file_list[0]), engine=reng)
@@ -67,8 +69,14 @@ def get_forcing_dict_newway_parallel(
     )
     filehandles = [xr.open_dataset("data/" + f) for f in file_list]
 
-    with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
-    # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+    if para == "process":
+        pool = concurrent.futures.ProcessPoolExecutor
+    elif para == "thread":
+        pool = concurrent.futures.ThreadPoolExecutor
+    else:
+        pool = concurrent.futures.ThreadPoolExecutor
+
+    with pool(max_workers=para_n) as executor:
         stats = []
         future_list = []
 
@@ -130,10 +138,12 @@ def get_forcing_dict_newway_inverted(
 
 
 def get_forcing_dict_newway_inverted_parallel(
-        feature_list,
-        folder_prefix,
-        file_list,
-        ):
+    feature_list,
+    folder_prefix,
+    file_list,
+    para="thread",
+    para_n=2,
+):
 
     import concurrent.futures
 
@@ -161,8 +171,14 @@ def get_forcing_dict_newway_inverted_parallel(
     stats = []
     future_list = []
 
-    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
-    # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+    if para == "process":
+        pool = concurrent.futures.ProcessPoolExecutor
+    elif para == "thread":
+        pool = concurrent.futures.ThreadPoolExecutor
+    else:
+        pool = concurrent.futures.ThreadPoolExecutor
+
+    with pool(max_workers=para_n) as executor:
 
         for f in filehandles:
             print(f"{i}, {round(i/len(file_list), 2)*100}".ljust(40), end="\r")
diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/process_nwm_forcing.py
index 20a2300..5e1430a 100644
--- a/ngen_forcing/process_nwm_forcing.py
+++ b/ngen_forcing/process_nwm_forcing.py
@@ -106,10 +106,10 @@ def main():
         "SWDOWN",
     ]
 
-    file_list = list_of_files[0:30]
-    gpkg_subset = gpkg_divides[0:2000]
-    #file_list = list_of_files[0:3]
-    #gpkg_subset = gpkg_divides[0:20]
+    # file_list = list_of_files[0:30]
+    # gpkg_subset = gpkg_divides[0:2000]
+    file_list = list_of_files[0:3]
+    gpkg_subset = gpkg_divides[0:200]
     feature_list = gpkg_subset.geometry.to_list()
 
     # start_time = time.time()
@@ -132,14 +132,26 @@ def main():
     print(time.time() - start_time)
 
     start_time = time.time()
-    print(f"Working on the new way with threading. (It's not much better.)")
+    print(f"Working on the new way with threading parallel.")
     fd3 = get_forcing_dict_newway_parallel(
         feature_list,
         folder_prefix,
         file_list,
-        )
+        para="thread",
+        para_n=16,
+    )
     print(time.time() - start_time)
 
+    start_time = time.time()
+    print(f"Working on the new way with process parallel.")
+    fd3 = get_forcing_dict_newway_parallel(
+        feature_list,
+        folder_prefix,
+        file_list,
+        para="process",
+        para_n=16,
+    )
+    print(time.time() - start_time)
 
     start_time = time.time()
     print(f"Working on the new way with loops reversed.")
@@ -151,11 +163,24 @@ def main():
     print(time.time() - start_time)
 
     start_time = time.time()
-    print(f"Working on the new way with loops reversed with threading.")
+    print(f"Working on the new way with loops reversed with threading parallel.")
+    fd4 = get_forcing_dict_newway_inverted_parallel(
+        feature_list,
+        folder_prefix,
+        file_list,
+        para="thread",
+        para_n=16,
+    )
+    print(time.time() - start_time)
+
+    start_time = time.time()
+    print(f"Working on the new way with loops reversed with process parallel.")
     fd4 = get_forcing_dict_newway_inverted_parallel(
         feature_list,
         folder_prefix,
         file_list,
+        para="process",
+        para_n=16,
     )
     print(time.time() - start_time)
 

From d1cf4282388f877d117ffc150740934eb51eaa35 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Mon, 2 Jan 2023 14:45:09 -0600
Subject: [PATCH 06/11] explain commented test block

---
 ngen_forcing/process_nwm_forcing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/process_nwm_forcing.py
index 5e1430a..2232691 100644
--- a/ngen_forcing/process_nwm_forcing.py
+++ b/ngen_forcing/process_nwm_forcing.py
@@ -112,6 +112,8 @@ def main():
     gpkg_subset = gpkg_divides[0:200]
     feature_list = gpkg_subset.geometry.to_list()
 
+    # This way is extremely slow for anything more than a
+    # few files, so we comment it out of the test
     # start_time = time.time()
     # print(f"Working on the original way")
     # forcing_dict = get_forcing_dict(

From 1f2c9906233d45aeefc43101a934915cfd9ba756 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Mon, 2 Jan 2023 14:57:45 -0600
Subject: [PATCH 07/11] better file names for process script

---
 ...ert_NWMForcing_to_Ngen.py => process_nwm_forcing_to_ngen.py} | 0
 ...ocess_nwm_forcing.py => test_process_nwm_forcing_to_ngen.py} | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename ngen_forcing/{TestConvert_NWMForcing_to_Ngen.py => process_nwm_forcing_to_ngen.py} (100%)
 rename ngen_forcing/{process_nwm_forcing.py => test_process_nwm_forcing_to_ngen.py} (99%)

diff --git a/ngen_forcing/TestConvert_NWMForcing_to_Ngen.py b/ngen_forcing/process_nwm_forcing_to_ngen.py
similarity index 100%
rename from ngen_forcing/TestConvert_NWMForcing_to_Ngen.py
rename to ngen_forcing/process_nwm_forcing_to_ngen.py
diff --git a/ngen_forcing/process_nwm_forcing.py b/ngen_forcing/test_process_nwm_forcing_to_ngen.py
similarity index 99%
rename from ngen_forcing/process_nwm_forcing.py
rename to ngen_forcing/test_process_nwm_forcing_to_ngen.py
index 2232691..5850bfb 100644
--- a/ngen_forcing/process_nwm_forcing.py
+++ b/ngen_forcing/test_process_nwm_forcing_to_ngen.py
@@ -8,7 +8,7 @@
 
 import time
 
-from TestConvert_NWMForcing_to_Ngen import (
+from process_nwm_forcing_to_ngen import (
     get_forcing_dict_newway,
     get_forcing_dict_newway_parallel,
     get_forcing_dict_newway_inverted,

From 1039630130925577a8b857db1f8c2959b758c5a6 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Fri, 27 Jan 2023 13:48:06 -0600
Subject: [PATCH 08/11] turn on slow test in committed file

---
 .../test_process_nwm_forcing_to_ngen.py        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/ngen_forcing/test_process_nwm_forcing_to_ngen.py b/ngen_forcing/test_process_nwm_forcing_to_ngen.py
index 5850bfb..a7d1404 100644
--- a/ngen_forcing/test_process_nwm_forcing_to_ngen.py
+++ b/ngen_forcing/test_process_nwm_forcing_to_ngen.py
@@ -114,15 +114,15 @@ def main():
 
     # This way is extremely slow for anything more than a
     # few files, so we comment it out of the test
-    # start_time = time.time()
-    # print(f"Working on the original way")
-    # forcing_dict = get_forcing_dict(
-    #     gpkg_subset,
-    #     folder_prefix,
-    #     file_list,
-    #     var_list,
-    #     )
-    # print(time.time() - start_time)
+    start_time = time.time()
+    print(f"Working on the old (slow) way")
+    fd1 = get_forcing_dict(
+        gpkg_subset,
+        folder_prefix,
+        file_list,
+        var_list,
+    )
+    print(time.time() - start_time)
 
     start_time = time.time()
     print(f"Working on the new way")

From 4ad394c88382dc919de8ce7401587d7d0ff6eeee Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Mon, 30 Jan 2023 11:55:36 -0600
Subject: [PATCH 09/11] hold file handles for slow method

---
 ngen_forcing/test_process_nwm_forcing_to_ngen.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ngen_forcing/test_process_nwm_forcing_to_ngen.py b/ngen_forcing/test_process_nwm_forcing_to_ngen.py
index a7d1404..5fe24a3 100644
--- a/ngen_forcing/test_process_nwm_forcing_to_ngen.py
+++ b/ngen_forcing/test_process_nwm_forcing_to_ngen.py
@@ -61,11 +61,16 @@ def get_forcing_dict(
     for _v in var_list:
         df_dict[_v] = pd.DataFrame(index=gpkg_divides.index)
 
+    ds_list = []
     for _nc_file in filelist:
         # _nc_file = ("nwm.t00z.medium_range.forcing.f001.conus.nc")
         _full_nc_file = folder_prefix.joinpath(_nc_file)
+        ds_list.append(xr.open_dataset(_full_nc_file, engine=reng))
 
-        with xr.open_dataset(_full_nc_file, engine=reng) as _xds:
+    for _i, _nc_file in enumerate(filelist):
+        _xds = ds_list[_i]
+        print(f"{_i}, {round(_i/len(filelist), 5)*100}".ljust(40), end="\r")
+        if 1 == 1:
             for _v in var_list:
                 _src = _xds[_v]
                 _aff2 = _src.rio.transform()
@@ -78,6 +83,8 @@ def get_forcing_dict(
                 # gdf3 = pd.concat([gpkg_divides, _df_zonal_stats], axis=1)
                 df_dict[_v][_xds.time.values[0]] = _df_zonal_stats[sum_stat]
 
+    [_xds.close() for _xds in ds_list]
+
     return df_dict
 
 

From 180a81720daa2b237126436fc2df66ce15bda744 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Wed, 8 Feb 2023 16:21:37 -0600
Subject: [PATCH 10/11] add notebook with 240hour forcing buildup

---
 .../VERYROUGH_RTI_Forcing_example.ipynb       | 1021 +++++++++++++++++
 1 file changed, 1021 insertions(+)
 create mode 100644 ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb

diff --git a/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb b/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb
new file mode 100644
index 0000000..2ff823e
--- /dev/null
+++ b/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb
@@ -0,0 +1,1021 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "id": "c4b19730-d455-418f-a3a3-15efec3a8387",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: geoparquet in /Users/jshalgren/pyenv310/lib/python3.10/site-packages (0.0.3)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# !pip install config\n",
+    "!pip install geoparquet \n",
+    "import geoparquet as gpq\n",
+    "import pickle\n",
+    "# import config\n",
+    "import os\n",
+    "import gc\n",
+    "\n",
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import xarray as xr\n",
+    "import rasterio as rio\n",
+    "\n",
+    "from rasterio.io import MemoryFile\n",
+    "\n",
+    "from rasterio.features import rasterize\n",
+    "# from utils import parquet_to_gdf\n",
+    "# from Grid_to_parquet import get_dataset\n",
+    "\n",
+    "TEMPLATE_BLOB_NAME = \"nwm.20221001/forcing_medium_range/nwm.t00z.medium_range.forcing.f001.conus.nc\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "id": "e9159453-76c6-4bbe-b08f-a746a54f48f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NWM_BUCKET = \"national-water-model\"\n",
+    "\n",
+    "# WKT strings extracted from NWM grids\n",
+    "CONUS_NWM_WKT = 'PROJCS[\"Lambert_Conformal_Conic\",GEOGCS[\"GCS_Sphere\",DATUM[\"D_Sphere\",SPHEROID[\"Sphere\",6370000.0,0.0]], \\\n",
+    "PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"false_easting\",0.0],\\\n",
+    "PARAMETER[\"false_northing\",0.0],PARAMETER[\"central_meridian\",-97.0],PARAMETER[\"standard_parallel_1\",30.0],\\\n",
+    "PARAMETER[\"standard_parallel_2\",60.0],PARAMETER[\"latitude_of_origin\",40.0],UNIT[\"Meter\",1.0]]'\n",
+    "\n",
+    "HI_NWM_WKT = 'PROJCS[\"Lambert_Conformal_Conic\",GEOGCS[\"GCS_Sphere\",DATUM[\"D_Sphere\",SPHEROID[\"Sphere\",6370000.0,0.0]],\\\n",
+    "PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"false_easting\",0.0],\\\n",
+    "PARAMETER[\"false_northing\",0.0],PARAMETER[\"central_meridian\",-157.42],PARAMETER[\"standard_parallel_1\",10.0],\\\n",
+    "PARAMETER[\"standard_parallel_2\",30.0],PARAMETER[\"latitude_of_origin\",20.6],UNIT[\"Meter\",1.0]]'\n",
+    "\n",
+    "PR_NWM_WKT = 'PROJCS[\"Sphere_Lambert_Conformal_Conic\",GEOGCS[\"GCS_Sphere\",DATUM[\"D_Sphere\",SPHEROID[\"Sphere\",6370000.0,0.0]],\\\n",
+    "PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]],PROJECTION[\"Lambert_Conformal_Conic_2SP\"],PARAMETER[\"false_easting\",0.0],\\\n",
+    "PARAMETER[\"false_northing\",0.0],PARAMETER[\"central_meridian\",-65.91],PARAMETER[\"standard_parallel_1\",18.1],\\\n",
+    "PARAMETER[\"standard_parallel_2\",18.1],PARAMETER[\"latitude_of_origin\",18.1],UNIT[\"Meter\",1.0]]'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "eaba8b05-2b98-4aea-974f-d26de67f729b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "CACHE_DIR = Path(\".\", \"data_3\", \"cache\")\n",
+    "NWM_CACHE_DIR = os.path.join(CACHE_DIR, \"nwm\")\n",
+    "USGS_CACHE_DIR = os.path.join(CACHE_DIR, \"usgs\")\n",
+    "GEO_CACHE_DIR = os.path.join(CACHE_DIR, \"geo\")\n",
+    "\n",
+    "NWM_CACHE_H5 = os.path.join(NWM_CACHE_DIR, \"gcp_client.h5\")\n",
+    "\n",
+    "PARQUET_CACHE_DIR = os.path.join(CACHE_DIR, \"parquet\")\n",
+    "MEDIUM_RANGE_FORCING_PARQUET = os.path.join(PARQUET_CACHE_DIR, \"forcing_medium_range\")\n",
+    "FORCING_ANALYSIS_ASSIM_PARQUET = os.path.join(PARQUET_CACHE_DIR, \"forcing_analysis_assim\")\n",
+    "MEDIUM_RANGE_PARQUET = os.path.join(PARQUET_CACHE_DIR, \"medium_range\")\n",
+    "USGS_PARQUET = os.path.join(PARQUET_CACHE_DIR, \"usgs\")\n",
+    "\n",
+    "HUC10_SHP_FILEPATH = os.path.join(GEO_CACHE_DIR, \"wbdhu10_conus.shp\")\n",
+    "HUC10_PARQUET_FILEPATH = os.path.join(GEO_CACHE_DIR, \"wbdhu10_conus.parquet\")\n",
+    "HUC10_MEDIUM_RANGE_WEIGHTS_FILEPATH = os.path.join(GEO_CACHE_DIR, \"wbdhu10_medium_range_weights.pkl\")\n",
+    "\n",
+    "ROUTE_LINK_FILE = os.path.join(NWM_CACHE_DIR, \"RouteLink_CONUS.nc\")\n",
+    "ROUTE_LINK_PARQUET = os.path.join(NWM_CACHE_DIR, \"route_link_conus.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "82731a4e-70c8-4cc8-a6e1-2de5ff14a947",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def parquet_to_gdf(parquet_filepath: str) -> gpd.GeoDataFrame:\n",
+    "    gdf = gpd.read_parquet(parquet_filepath)\n",
+    "    return gdf\n",
+    "\n",
+    "def get_cache_dir(create: bool = True):\n",
+    "    if not os.path.exists(NWM_CACHE_DIR) and create:\n",
+    "        os.mkdir(NWM_CACHE_DIR)\n",
+    "    if not os.path.exists(NWM_CACHE_DIR):\n",
+    "        raise NotADirectoryError\n",
+    "    return NWM_CACHE_DIR\n",
+    "\n",
+    "\n",
+    "def make_parent_dir(filepath):\n",
+    "    Path(filepath).parent.mkdir(parents=True, exist_ok=True)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "74895a20-a42e-48ff-bff8-313a0ae26d14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_dataset(\n",
+    "        blob_name: str,\n",
+    "        use_cache: bool = True\n",
+    ") -> xr.Dataset:\n",
+    "    \"\"\"Retrieve a blob from the data service as xarray.Dataset.\n",
+    "    Based largely on OWP HydroTools.\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    blob_name: str, required\n",
+    "        Name of blob to retrieve.\n",
+    "    use_cacahe: bool, default True\n",
+    "        If cache should be used.  \n",
+    "        If True, checks to see if file is in cache, and \n",
+    "        if fetched from remote will save to cache.\n",
+    "    Returns\n",
+    "    -------\n",
+    "    ds : xarray.Dataset\n",
+    "        The data stored in the blob.\n",
+    "    \"\"\"\n",
+    "    nc_filepath = os.path.join(get_cache_dir(), blob_name)\n",
+    "    make_parent_dir(nc_filepath)\n",
+    "\n",
+    "    # If the file exists and use_cache = True\n",
+    "    if os.path.exists(nc_filepath) and use_cache:\n",
+    "        # Get dataset from cache\n",
+    "        ds = xr.load_dataset(\n",
+    "            nc_filepath,\n",
+    "            engine='h5netcdf',\n",
+    "        )\n",
+    "        return ds\n",
+    "    else:\n",
+    "        # Get raw bytes\n",
+    "        raw_bytes = get_blob(blob_name)\n",
+    "        # Create Dataset\n",
+    "        ds = xr.load_dataset(\n",
+    "            MemoryFile(raw_bytes),\n",
+    "            engine='h5netcdf',\n",
+    "        )\n",
+    "        if use_cache:\n",
+    "            # Subset and cache\n",
+    "            ds[\"RAINRATE\"].to_netcdf(\n",
+    "                nc_filepath,\n",
+    "                engine='h5netcdf',\n",
+    "            )\n",
+    "        return ds\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "b199b72a-0220-4583-ac3a-9f27c1586fe1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def generate_weights_file(\n",
+    "    gdf: gpd.GeoDataFrame,\n",
+    "    src: xr.DataArray,\n",
+    "    weights_filepath: str,\n",
+    "    crosswalk_dict_key: str, \n",
+    "):\n",
+    "    \"\"\"Generate a weights file.\"\"\"\n",
+    "    \n",
+    "    gdf_proj = gdf.to_crs(CONUS_NWM_WKT)\n",
+    "    \n",
+    "    crosswalk_dict = {}\n",
+    "    \n",
+    "    # This is a probably a really poor performing way to do this \n",
+    "    for index, row in gdf_proj.iterrows():\n",
+    "        geom_rasterize = rasterize([(row[\"geometry\"], 1)],\n",
+    "                                   out_shape=src.rio.shape,\n",
+    "                                   transform=src.rio.transform(),\n",
+    "                                   all_touched=True,\n",
+    "                                   fill=0,\n",
+    "                                   dtype='uint8')\n",
+    "        if crosswalk_dict_key:\n",
+    "            crosswalk_dict[row[crosswalk_dict_key]\n",
+    "                           ] = np.where(geom_rasterize == 1)\n",
+    "        else:\n",
+    "            crosswalk_dict[index] = np.where(geom_rasterize == 1)\n",
+    "\n",
+    "    with open(weights_filepath, 'wb') as f:\n",
+    "        pickle.dump(crosswalk_dict, f)\n",
+    "\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "dec7dd6a-483b-4401-9690-4fb213d2fc85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def add_zonalstats_to_gdf_weights(\n",
+    "    gdf: gpd.GeoDataFrame,\n",
+    "    src: xr.DataArray,\n",
+    "    weights_filepath: str,\n",
+    ") -> gpd.GeoDataFrame:\n",
+    "    \"\"\"Calculates zonal stats and adds to GeoDataFrame\"\"\"\n",
+    "\n",
+    "    df = calc_zonal_stats_weights(src, weights_filepath)\n",
+    "    gdf_map = gdf.merge(df, left_on=\"huc10\", right_on=\"catchment_id\")\n",
+    "\n",
+    "    return gdf_map"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "cad942dd-29ec-4ed8-be6d-e5cc13dde0ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from google.cloud import storage\n",
+    "def get_blob(\n",
+    "    blob_name: str,\n",
+    "    bucket: str = NWM_BUCKET\n",
+    ") -> bytes:\n",
+    "    \"\"\"Retrieve a blob from the data service as bytes.\n",
+    "    Based largely on OWP HydroTools.\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    blob_name : str, required\n",
+    "        Name of blob to retrieve.\n",
+    "    Returns\n",
+    "    -------\n",
+    "    data : bytes\n",
+    "        The data stored in the blob.\n",
+    "    \"\"\"\n",
+    "    # Setup anonymous client and retrieve blob data\n",
+    "    client = storage.Client.create_anonymous_client()\n",
+    "    bucket = client.bucket(bucket)\n",
+    "    return bucket.blob(blob_name).download_as_bytes(timeout=120)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "bb25a27c-fd98-478c-97ef-c53dbd73a6f1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 305 ms, sys: 46.5 ms, total: 351 ms\n",
+      "Wall time: 356 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "### MAIN\n",
+    "ds = get_dataset(TEMPLATE_BLOB_NAME, use_cache=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "e8e7f87d-ae19-454e-b958-2668e15d5c10",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 1.56 s, sys: 52.4 ms, total: 1.62 s\n",
+      "Wall time: 1.65 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "polygonfile = gpd.read_file(\"03w/nextgen_03W.gpkg\",layer=\"divides\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "95f6ad0a-ec08-42aa-a9a9-4694dd7ad1e0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 171 ms, sys: 21.4 ms, total: 193 ms\n",
+      "Wall time: 209 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "polygonfile.to_parquet(\"03w/ng_03W.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "d540ca78-43d5-4387-bd77-c6d8848ca576",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 271 ms, sys: 53.4 ms, total: 324 ms\n",
+      "Wall time: 340 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "gdf = parquet_to_gdf(\"03w/ng_03W.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "fb1dc54e-89ff-4d91-b73e-350da31e3ebb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 27min 19s, sys: 1min 20s, total: 28min 39s\n",
+      "Wall time: 36min 34s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "src = ds[\"RAINRATE\"]\n",
+    "generate_weights_file(gdf, src, \"data_3/weights.pkl\", crosswalk_dict_key=\"id\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "id": "d2101a68-2365-41db-a226-e70f661ed57a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install ipdb\n",
+    "def calc_zonal_stats_weights(\n",
+    "    src: xr.DataArray,\n",
+    "    weights_filepath: str,\n",
+    ") -> pd.DataFrame:\n",
+    "    \"\"\"Calculates zonal stats\"\"\"\n",
+    "\n",
+    "    # Open weights dict from pickle\n",
+    "    # This could probably be done once and passed as a reference.\n",
+    "    with open(weights_filepath, 'rb') as f:\n",
+    "        crosswalk_dict = pickle.load(f)\n",
+    "\n",
+    "    r_array = src.values[0]\n",
+    "    r_array[r_array == src.rio.nodata] = np.nan\n",
+    "\n",
+    "    mean_dict = {}\n",
+    "    for key, value in crosswalk_dict.items():\n",
+    "        mean_dict[key] = np.nanmean(r_array[value])\n",
+    "\n",
+    "    df = pd.DataFrame.from_dict(mean_dict,\n",
+    "                                orient='index',\n",
+    "                                columns=['value'])\n",
+    "\n",
+    "    df.reset_index(inplace=True, names=\"catchment_id\")\n",
+    "\n",
+    "    # This should not be needed, but without memory usage grows\n",
+    "    del crosswalk_dict\n",
+    "    del f\n",
+    "    gc.collect()\n",
+    "\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 135,
+   "id": "cc543b89-9335-4a30-bdae-83c30293d6dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 752 ms, sys: 13.8 ms, total: 765 ms\n",
+      "Wall time: 767 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>catchment_id</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>cat-107068</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>cat-107233</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>cat-107683</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>cat-108167</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>cat-112977</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30867</th>\n",
+       "      <td>cat-137554</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30868</th>\n",
+       "      <td>cat-106885</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30869</th>\n",
+       "      <td>cat-137071</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30870</th>\n",
+       "      <td>cat-137068</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30871</th>\n",
+       "      <td>cat-137748</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>30872 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      catchment_id  value\n",
+       "0       cat-107068    0.0\n",
+       "1       cat-107233    0.0\n",
+       "2       cat-107683    0.0\n",
+       "3       cat-108167    0.0\n",
+       "4       cat-112977    0.0\n",
+       "...            ...    ...\n",
+       "30867   cat-137554    0.0\n",
+       "30868   cat-106885    0.0\n",
+       "30869   cat-137071    0.0\n",
+       "30870   cat-137068    0.0\n",
+       "30871   cat-137748    0.0\n",
+       "\n",
+       "[30872 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 135,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "calc_zonal_stats_weights(src, \"data_3/weights.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "id": "fa8c71d3-2fd1-46eb-8fee-d3f0ddb00929",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_forcing_dict_RTIway(\n",
+    "    pickle_file, # This would be a Feature list for parallel calling -- \n",
+    "    # if there is a stored weights file, we use it \n",
+    "    # (checking for an optional flag to force re-creation of the weights...)\n",
+    "    folder_prefix,\n",
+    "    file_list,\n",
+    "):\n",
+    "\n",
+    "    var = \"RAINRATE\"\n",
+    "    reng = \"rasterio\"\n",
+    "    filehandles = [xr.open_dataset(folder_prefix / f, engine=reng)[var] for f in file_list]\n",
+    "    # filehandles = [get_dataset(\"data/\" + f, use_cache=True) for f in file_list]\n",
+    "    stats = []\n",
+    "\n",
+    "    for _i, f in enumerate(filehandles):\n",
+    "        print(f\"{_i}, {round(_i/len(file_list), 2)*100}\".ljust(40), end=\"\\r\")\n",
+    "        stats.append(calc_zonal_stats_weights(f, pickle_file))\n",
+    "\n",
+    "    [f.close() for f in filehandles]\n",
+    "    return stats\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 174,
+   "id": "129e1681-d89c-4636-a487-0432fe7e283c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def get_forcing_dict_RTIway2(\n",
+    "    pickle_file, # This would be a Feature list for parallel calling -- \n",
+    "    # if there is a stored weights file, we use it \n",
+    "    # (checking for an optional flag to force re-creation of the weights...)\n",
+    "    gpkg_divides,\n",
+    "    folder_prefix,\n",
+    "    filelist,\n",
+    "    var_list,\n",
+    "):\n",
+    "    reng = \"rasterio\"\n",
+    "    pick_val = \"value\"\n",
+    "\n",
+    "    df_dict = {}\n",
+    "    for _v in var_list:\n",
+    "        df_dict[_v] = pd.DataFrame(index=gpkg_divides.index)\n",
+    "\n",
+    "    ds_list = []\n",
+    "    for _i, _nc_file in enumerate(filelist):\n",
+    "        # _nc_file = (\"nwm.t00z.medium_range.forcing.f001.conus.nc\")\n",
+    "        _full_nc_file = folder_prefix.joinpath(_nc_file)\n",
+    "\n",
+    "        with xr.open_dataset(_full_nc_file, engine=reng) as _xds:\n",
+    "        #_xds = ds_list[_i]\n",
+    "            print(f\"{_i}, {round(_i/len(filelist), 5)*100}\".ljust(40), end=\"\\r\")\n",
+    "            for _v in var_list:\n",
+    "                _src = _xds[_v]\n",
+    "\n",
+    "                # import ipdb; ipdb.set_trace()\n",
+    "                _df_zonal_stats = (calc_zonal_stats_weights(_src, pickle_file))\n",
+    "                # if adding statistics back to original GeoDataFrame\n",
+    "                # gdf3 = pd.concat([gpkg_divides, _df_zonal_stats], axis=1)\n",
+    "                df_dict[_v][_xds.time.values[0]] = _df_zonal_stats[pick_val]\n",
+    "\n",
+    "    [_xds.close() for _xds in ds_list]\n",
+    "\n",
+    "    return df_dict\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8459aaa9-11ef-40d7-bced-1217ab59a2f5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Working on the new way\n",
+      "44, 18.333                              \r"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "import time\n",
+    "folder_prefix = Path(\"data\")\n",
+    "list_of_files = [\n",
+    "    f\"nwm.t12z.medium_range.forcing.f{_r:03}.conus.nc\" for _r in range(1, 241)\n",
+    "]\n",
+    "\n",
+    "f_03 = \"03w/nextgen_03W.gpkg\"\n",
+    "gpkg_divides = gpd.read_file(f_03, layer=\"divides\")\n",
+    "var_list = [\n",
+    "    \"U2D\",\n",
+    "    \"V2D\",\n",
+    "    \"LWDOWN\",\n",
+    "    \"RAINRATE\",\n",
+    "    \"T2D\",\n",
+    "    \"Q2D\",\n",
+    "    \"PSFC\",\n",
+    "    \"SWDOWN\",\n",
+    "]\n",
+    "\n",
+    "\n",
+    "# file_list = list_of_files[0:18]\n",
+    "file_list = list_of_files[0:3]\n",
+    "file_list = list_of_files[0:]\n",
+    "\n",
+    "pickle_file = \"data_3/weights.pkl\"\n",
+    "\n",
+    "start_time = time.time()\n",
+    "print(f\"Working on the new way\")\n",
+    "fd2 = get_forcing_dict_RTIway2(\n",
+    "    pickle_file,\n",
+    "    gpkg_divides,\n",
+    "    folder_prefix,\n",
+    "    file_list,\n",
+    "    var_list,\n",
+    ")\n",
+    "print(time.time() - start_time)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 176,
+   "id": "358d9a99-2016-4c51-b65c-e9cd92fce358",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'U2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0                 0.093212            -0.436970            -0.097939\n",
+       " 1                -0.008313            -0.587000            -0.435375\n",
+       " 2                -0.442512            -0.937268            -0.507317\n",
+       " 3                -0.178103            -0.600897            -0.614621\n",
+       " 4                -0.694703            -1.009649            -1.231270\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867            -1.230500            -1.497500            -1.447500\n",
+       " 30868            -0.728547            -0.795235            -0.682782\n",
+       " 30869             0.348000             0.298625             0.092250\n",
+       " 30870             0.331000             0.297000             0.093000\n",
+       " 30871            -0.856077            -1.326692            -1.195231\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'V2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0                -2.467212            -2.618788            -2.710545\n",
+       " 1                -1.119000            -2.423063            -2.262000\n",
+       " 2                -1.036951            -1.427024            -1.693463\n",
+       " 3                -1.777345            -2.729759            -2.693241\n",
+       " 4                -1.849676            -1.769676            -1.616486\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867            -0.698500            -0.684000            -0.593500\n",
+       " 30868            -2.663799            -2.709721            -2.727983\n",
+       " 30869            -2.689000            -3.307000            -3.045625\n",
+       " 30870            -2.702000            -3.304000            -3.041000\n",
+       " 30871            -0.444692            -0.183462            -0.355769\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'LWDOWN':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0               322.563606           316.459333           319.125182\n",
+       " 1               392.175313           395.426188           398.251188\n",
+       " 2               398.620732           394.783537           409.061756\n",
+       " 3               391.672207           382.839034           398.388034\n",
+       " 4               343.990892           347.926405           352.131730\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867           319.904000           331.289000           341.212000\n",
+       " 30868           311.615291           313.601397           313.269972\n",
+       " 30869           328.200500           334.166375           379.226875\n",
+       " 30870           328.220000           334.226000           379.722000\n",
+       " 30871           353.785769           324.706538           329.218692\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'RAINRATE':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0             0.000000e+00         0.000000e+00         0.000000e+00\n",
+       " 1             5.745148e-07         2.020275e-07         1.694254e-06\n",
+       " 2             3.051579e-07         2.494036e-08         5.281748e-08\n",
+       " 3             0.000000e+00         0.000000e+00         1.281399e-08\n",
+       " 4             0.000000e+00         0.000000e+00         0.000000e+00\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867         0.000000e+00         0.000000e+00         0.000000e+00\n",
+       " 30868         0.000000e+00         0.000000e+00         0.000000e+00\n",
+       " 30869         0.000000e+00         8.736407e-07         4.344080e-07\n",
+       " 30870         0.000000e+00         8.062991e-07         4.031495e-07\n",
+       " 30871         0.000000e+00         0.000000e+00         0.000000e+00\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'T2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0               285.556061           287.930303           289.624848\n",
+       " 1               287.690625           290.366875           289.825000\n",
+       " 2               288.297805           290.214146           291.377317\n",
+       " 3               287.798621           290.006897           290.491724\n",
+       " 4               289.404054           291.912432           293.802703\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867           285.505000           287.790000           290.055000\n",
+       " 30868           286.481676           288.548603           290.101508\n",
+       " 30869           287.532500           288.567500           287.537500\n",
+       " 30870           287.550000           288.570000           287.550000\n",
+       " 30871           286.033846           288.870000           291.253077\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'Q2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0                 0.008322             0.007559             0.006950\n",
+       " 1                 0.009809             0.009331             0.009382\n",
+       " 2                 0.010486             0.010341             0.010183\n",
+       " 3                 0.009420             0.009144             0.008930\n",
+       " 4                 0.010069             0.009712             0.009377\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867             0.008171             0.007912             0.007719\n",
+       " 30868             0.006779             0.006366             0.006304\n",
+       " 30869             0.008371             0.007918             0.007822\n",
+       " 30870             0.008359             0.007912             0.007823\n",
+       " 30871             0.007981             0.007854             0.007728\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'PSFC':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0             97608.357576         97617.875758         97623.369697\n",
+       " 1             98797.675000         98810.750000         98835.112500\n",
+       " 2             98744.995122         98773.275610         98794.758537\n",
+       " 3             98500.610345         98523.958621         98538.641379\n",
+       " 4             98549.475676         98560.543243         98548.802703\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867         98681.500000         98687.650000         98663.800000\n",
+       " 30868         97187.253073         97204.259218         97203.415642\n",
+       " 30869         98678.750000         98714.250000         98733.575000\n",
+       " 30870         98726.200000         98761.900000         98781.400000\n",
+       " 30871         98472.423077         98459.492308         98439.884615\n",
+       " \n",
+       " [30872 rows x 3 columns],\n",
+       " 'SWDOWN':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
+       " 0               298.820667           499.120333           643.196424\n",
+       " 1                33.838313           256.588375            88.550188\n",
+       " 2                24.711732           200.719366           232.752341\n",
+       " 3                44.000414           316.280138           241.046621\n",
+       " 4               262.306459           440.323297           593.502919\n",
+       " ...                    ...                  ...                  ...\n",
+       " 30867           263.077500           409.978000           538.065500\n",
+       " 30868           322.893899           488.363173           644.919229\n",
+       " 30869           297.890750           435.522875           208.013250\n",
+       " 30870           296.160000           432.068000           204.971000\n",
+       " 30871           184.605154           470.781846           616.673769\n",
+       " \n",
+       " [30872 rows x 3 columns]}"
+      ]
+     },
+     "execution_count": 176,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fd2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d37d2590-1de1-4e9c-85f2-9c7a1172f07c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %%time\n",
+    "# if 1 == 1:\n",
+    "#     _f = folder_prefix / list_of_files[0]\n",
+    "#     src = xr.open_dataset(_f)[\"RAINRATE\"]\n",
+    "#     generate_weights_file(gdf, src, \"data_3/weights_alt.pkl\", crosswalk_dict_key=\"id\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 149,
+   "id": "ea5887b5-b637-4642-bd47-ade6118e6532",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "240"
+      ]
+     },
+     "execution_count": 149,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(fd2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 151,
+   "id": "61e4d101-7973-4e64-8619-4ca6ab9a0a18",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>catchment_id</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>cat-107068</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>cat-107233</td>\n",
+       "      <td>5.745148e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>cat-107683</td>\n",
+       "      <td>3.051579e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>cat-108167</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>cat-112977</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30867</th>\n",
+       "      <td>cat-137554</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30868</th>\n",
+       "      <td>cat-106885</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30869</th>\n",
+       "      <td>cat-137071</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30870</th>\n",
+       "      <td>cat-137068</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30871</th>\n",
+       "      <td>cat-137748</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>30872 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      catchment_id         value\n",
+       "0       cat-107068  0.000000e+00\n",
+       "1       cat-107233  5.745148e-07\n",
+       "2       cat-107683  3.051579e-07\n",
+       "3       cat-108167  0.000000e+00\n",
+       "4       cat-112977  0.000000e+00\n",
+       "...            ...           ...\n",
+       "30867   cat-137554  0.000000e+00\n",
+       "30868   cat-106885  0.000000e+00\n",
+       "30869   cat-137071  0.000000e+00\n",
+       "30870   cat-137068  0.000000e+00\n",
+       "30871   cat-137748  0.000000e+00\n",
+       "\n",
+       "[30872 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 151,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fd2[0]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3885ed0-8b83-46af-afc9-75639ff8b7c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "pcp_var = pd.read_csv(\"RAINRATE.csv\", index_col=0)#.rename(\"APCP_surface\")\n",
+    "lw_var = pd.read_csv(\"LWDOWN.csv\", index_col=0)#.rename(\"DLWRF_surface\")\n",
+    "sw_var = pd.read_csv(\"SWDOWN.csv\", index_col=0)#.rename(\"DSWRF_surface\")\n",
+    "sp_var = pd.read_csv(\"PSFC.csv\", index_col=0)#.rename(\"SPFH_2maboveground\")\n",
+    "tmp_var = pd.read_csv(\"T2D.csv\", index_col=0)#.rename(\"TMP_2maboveground\")\n",
+    "u2d_var = pd.read_csv(\"U2D.csv\", index_col=0)#.rename(\"UGRD_10maboveground\")\n",
+    "v2d_var = pd.read_csv(\"V2D.csv\", index_col=0)#.rename(\"VGRD_10maboveground\")\n",
+    "pcp_var2 = pd.read_csv(\"RAINRATE.csv\", index_col=0)#.rename(\"precip_rate\") ##BROKEN!!\n",
+    "\n",
+    "for _i in range(0,40000):\n",
+    "    #_i = 0\n",
+    "    try:\n",
+    "        pcp_var_0 = pcp_var.transpose()[_i].rename(\"APCP_surface\")\n",
+    "        lw_var_0 = lw_var.transpose()[_i].rename(\"DLWRF_surface\")\n",
+    "        sw_var_0 = sw_var.transpose()[_i].rename(\"DSWRF_surface\")\n",
+    "        sp_var_0 = sp_var.transpose()[_i].rename(\"SPFH_2maboveground\")\n",
+    "        tmp_var_0 = tmp_var.transpose()[_i].rename(\"TMP_2maboveground\")\n",
+    "        u2d_var_0 = u2d_var.transpose()[_i].rename(\"UGRD_10maboveground\")\n",
+    "        v2d_var_0 = v2d_var.transpose()[_i].rename(\"VGRD_10maboveground\")\n",
+    "        pcp_var2_0 = pcp_var2.transpose()[_i].rename(\"precip_rate\") ##BROKEN!!\n",
+    "\n",
+    "        d = pd.concat([pcp_var_0, lw_var_0, sw_var_0, sp_var_0, tmp_var_0, u2d_var_0, v2d_var_0, pcp_var2_0], axis=1)\n",
+    "        d.index.name = \"time\"\n",
+    "\n",
+    "\n",
+    "        d.to_csv(f\"input_data/cat03w_{_i:07}.csv\")\n",
+    "    except:\n",
+    "        print(f\"no data for watershed {_i}\", end=\"\\t\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From eea2b8648a1be977d0e08d58d34161503e7cc8c5 Mon Sep 17 00:00:00 2001
From: James Halgren <jshalgren@ua.edu>
Date: Fri, 10 Feb 2023 08:50:47 -0600
Subject: [PATCH 11/11] complete workflow

---
 .../VERYROUGH_RTI_Forcing_example.ipynb       | 539 +++---------------
 1 file changed, 68 insertions(+), 471 deletions(-)

diff --git a/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb b/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb
index 2ff823e..acba744 100644
--- a/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb
+++ b/ngen_forcing/VERYROUGH_RTI_Forcing_example.ipynb
@@ -2,21 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": null,
    "id": "c4b19730-d455-418f-a3a3-15efec3a8387",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: geoparquet in /Users/jshalgren/pyenv310/lib/python3.10/site-packages (0.0.3)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# !pip install config\n",
     "!pip install geoparquet \n",
@@ -43,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": null,
    "id": "e9159453-76c6-4bbe-b08f-a746a54f48f5",
    "metadata": {},
    "outputs": [],
@@ -69,7 +58,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "eaba8b05-2b98-4aea-974f-d26de67f729b",
    "metadata": {},
    "outputs": [],
@@ -101,7 +90,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "id": "82731a4e-70c8-4cc8-a6e1-2de5ff14a947",
    "metadata": {},
    "outputs": [],
@@ -125,7 +114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "74895a20-a42e-48ff-bff8-313a0ae26d14",
    "metadata": {},
    "outputs": [],
@@ -181,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "id": "b199b72a-0220-4583-ac3a-9f27c1586fe1",
    "metadata": {},
    "outputs": [],
@@ -221,7 +210,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": null,
    "id": "dec7dd6a-483b-4401-9690-4fb213d2fc85",
    "metadata": {},
    "outputs": [],
@@ -241,7 +230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": null,
    "id": "cad942dd-29ec-4ed8-be6d-e5cc13dde0ef",
    "metadata": {},
    "outputs": [],
@@ -271,19 +260,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": null,
    "id": "bb25a27c-fd98-478c-97ef-c53dbd73a6f1",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 305 ms, sys: 46.5 ms, total: 351 ms\n",
-      "Wall time: 356 ms\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "### MAIN\n",
@@ -292,19 +272,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": null,
    "id": "e8e7f87d-ae19-454e-b958-2668e15d5c10",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 1.56 s, sys: 52.4 ms, total: 1.62 s\n",
-      "Wall time: 1.65 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "polygonfile = gpd.read_file(\"03w/nextgen_03W.gpkg\",layer=\"divides\")"
@@ -312,19 +283,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": null,
    "id": "95f6ad0a-ec08-42aa-a9a9-4694dd7ad1e0",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 171 ms, sys: 21.4 ms, total: 193 ms\n",
-      "Wall time: 209 ms\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "polygonfile.to_parquet(\"03w/ng_03W.parquet\")"
@@ -332,19 +294,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": null,
    "id": "d540ca78-43d5-4387-bd77-c6d8848ca576",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 271 ms, sys: 53.4 ms, total: 324 ms\n",
-      "Wall time: 340 ms\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "gdf = parquet_to_gdf(\"03w/ng_03W.parquet\")"
@@ -352,19 +305,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": null,
    "id": "fb1dc54e-89ff-4d91-b73e-350da31e3ebb",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 27min 19s, sys: 1min 20s, total: 28min 39s\n",
-      "Wall time: 36min 34s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "src = ds[\"RAINRATE\"]\n",
@@ -373,7 +317,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 134,
+   "execution_count": null,
    "id": "d2101a68-2365-41db-a226-e70f661ed57a",
    "metadata": {},
    "outputs": [],
@@ -413,126 +357,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 135,
+   "execution_count": null,
    "id": "cc543b89-9335-4a30-bdae-83c30293d6dd",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 752 ms, sys: 13.8 ms, total: 765 ms\n",
-      "Wall time: 767 ms\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>catchment_id</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>cat-107068</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>cat-107233</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>cat-107683</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>cat-108167</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>cat-112977</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30867</th>\n",
-       "      <td>cat-137554</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30868</th>\n",
-       "      <td>cat-106885</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30869</th>\n",
-       "      <td>cat-137071</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30870</th>\n",
-       "      <td>cat-137068</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30871</th>\n",
-       "      <td>cat-137748</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>30872 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      catchment_id  value\n",
-       "0       cat-107068    0.0\n",
-       "1       cat-107233    0.0\n",
-       "2       cat-107683    0.0\n",
-       "3       cat-108167    0.0\n",
-       "4       cat-112977    0.0\n",
-       "...            ...    ...\n",
-       "30867   cat-137554    0.0\n",
-       "30868   cat-106885    0.0\n",
-       "30869   cat-137071    0.0\n",
-       "30870   cat-137068    0.0\n",
-       "30871   cat-137748    0.0\n",
-       "\n",
-       "[30872 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 135,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "calc_zonal_stats_weights(src, \"data_3/weights.pkl\")"
@@ -540,7 +368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 140,
+   "execution_count": null,
    "id": "fa8c71d3-2fd1-46eb-8fee-d3f0ddb00929",
    "metadata": {},
    "outputs": [],
@@ -570,7 +398,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 174,
+   "execution_count": null,
    "id": "129e1681-d89c-4636-a487-0432fe7e283c",
    "metadata": {},
    "outputs": [],
@@ -620,16 +448,7 @@
    "execution_count": null,
    "id": "8459aaa9-11ef-40d7-bced-1217ab59a2f5",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Working on the new way\n",
-      "44, 18.333                              \r"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "import time\n",
@@ -671,138 +490,6 @@
     "\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 176,
-   "id": "358d9a99-2016-4c51-b65c-e9cd92fce358",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'U2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0                 0.093212            -0.436970            -0.097939\n",
-       " 1                -0.008313            -0.587000            -0.435375\n",
-       " 2                -0.442512            -0.937268            -0.507317\n",
-       " 3                -0.178103            -0.600897            -0.614621\n",
-       " 4                -0.694703            -1.009649            -1.231270\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867            -1.230500            -1.497500            -1.447500\n",
-       " 30868            -0.728547            -0.795235            -0.682782\n",
-       " 30869             0.348000             0.298625             0.092250\n",
-       " 30870             0.331000             0.297000             0.093000\n",
-       " 30871            -0.856077            -1.326692            -1.195231\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'V2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0                -2.467212            -2.618788            -2.710545\n",
-       " 1                -1.119000            -2.423063            -2.262000\n",
-       " 2                -1.036951            -1.427024            -1.693463\n",
-       " 3                -1.777345            -2.729759            -2.693241\n",
-       " 4                -1.849676            -1.769676            -1.616486\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867            -0.698500            -0.684000            -0.593500\n",
-       " 30868            -2.663799            -2.709721            -2.727983\n",
-       " 30869            -2.689000            -3.307000            -3.045625\n",
-       " 30870            -2.702000            -3.304000            -3.041000\n",
-       " 30871            -0.444692            -0.183462            -0.355769\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'LWDOWN':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0               322.563606           316.459333           319.125182\n",
-       " 1               392.175313           395.426188           398.251188\n",
-       " 2               398.620732           394.783537           409.061756\n",
-       " 3               391.672207           382.839034           398.388034\n",
-       " 4               343.990892           347.926405           352.131730\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867           319.904000           331.289000           341.212000\n",
-       " 30868           311.615291           313.601397           313.269972\n",
-       " 30869           328.200500           334.166375           379.226875\n",
-       " 30870           328.220000           334.226000           379.722000\n",
-       " 30871           353.785769           324.706538           329.218692\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'RAINRATE':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0             0.000000e+00         0.000000e+00         0.000000e+00\n",
-       " 1             5.745148e-07         2.020275e-07         1.694254e-06\n",
-       " 2             3.051579e-07         2.494036e-08         5.281748e-08\n",
-       " 3             0.000000e+00         0.000000e+00         1.281399e-08\n",
-       " 4             0.000000e+00         0.000000e+00         0.000000e+00\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867         0.000000e+00         0.000000e+00         0.000000e+00\n",
-       " 30868         0.000000e+00         0.000000e+00         0.000000e+00\n",
-       " 30869         0.000000e+00         8.736407e-07         4.344080e-07\n",
-       " 30870         0.000000e+00         8.062991e-07         4.031495e-07\n",
-       " 30871         0.000000e+00         0.000000e+00         0.000000e+00\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'T2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0               285.556061           287.930303           289.624848\n",
-       " 1               287.690625           290.366875           289.825000\n",
-       " 2               288.297805           290.214146           291.377317\n",
-       " 3               287.798621           290.006897           290.491724\n",
-       " 4               289.404054           291.912432           293.802703\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867           285.505000           287.790000           290.055000\n",
-       " 30868           286.481676           288.548603           290.101508\n",
-       " 30869           287.532500           288.567500           287.537500\n",
-       " 30870           287.550000           288.570000           287.550000\n",
-       " 30871           286.033846           288.870000           291.253077\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'Q2D':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0                 0.008322             0.007559             0.006950\n",
-       " 1                 0.009809             0.009331             0.009382\n",
-       " 2                 0.010486             0.010341             0.010183\n",
-       " 3                 0.009420             0.009144             0.008930\n",
-       " 4                 0.010069             0.009712             0.009377\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867             0.008171             0.007912             0.007719\n",
-       " 30868             0.006779             0.006366             0.006304\n",
-       " 30869             0.008371             0.007918             0.007822\n",
-       " 30870             0.008359             0.007912             0.007823\n",
-       " 30871             0.007981             0.007854             0.007728\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'PSFC':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0             97608.357576         97617.875758         97623.369697\n",
-       " 1             98797.675000         98810.750000         98835.112500\n",
-       " 2             98744.995122         98773.275610         98794.758537\n",
-       " 3             98500.610345         98523.958621         98538.641379\n",
-       " 4             98549.475676         98560.543243         98548.802703\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867         98681.500000         98687.650000         98663.800000\n",
-       " 30868         97187.253073         97204.259218         97203.415642\n",
-       " 30869         98678.750000         98714.250000         98733.575000\n",
-       " 30870         98726.200000         98761.900000         98781.400000\n",
-       " 30871         98472.423077         98459.492308         98439.884615\n",
-       " \n",
-       " [30872 rows x 3 columns],\n",
-       " 'SWDOWN':        2022-08-24 13:00:00  2022-08-24 14:00:00  2022-08-24 15:00:00\n",
-       " 0               298.820667           499.120333           643.196424\n",
-       " 1                33.838313           256.588375            88.550188\n",
-       " 2                24.711732           200.719366           232.752341\n",
-       " 3                44.000414           316.280138           241.046621\n",
-       " 4               262.306459           440.323297           593.502919\n",
-       " ...                    ...                  ...                  ...\n",
-       " 30867           263.077500           409.978000           538.065500\n",
-       " 30868           322.893899           488.363173           644.919229\n",
-       " 30869           297.890750           435.522875           208.013250\n",
-       " 30870           296.160000           432.068000           204.971000\n",
-       " 30871           184.605154           470.781846           616.673769\n",
-       " \n",
-       " [30872 rows x 3 columns]}"
-      ]
-     },
-     "execution_count": 176,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fd2"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -820,141 +507,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 149,
+   "execution_count": null,
    "id": "ea5887b5-b637-4642-bd47-ade6118e6532",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "240"
-      ]
-     },
-     "execution_count": 149,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "len(fd2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 151,
+   "execution_count": null,
    "id": "61e4d101-7973-4e64-8619-4ca6ab9a0a18",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>catchment_id</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>cat-107068</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>cat-107233</td>\n",
-       "      <td>5.745148e-07</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>cat-107683</td>\n",
-       "      <td>3.051579e-07</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>cat-108167</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>cat-112977</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30867</th>\n",
-       "      <td>cat-137554</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30868</th>\n",
-       "      <td>cat-106885</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30869</th>\n",
-       "      <td>cat-137071</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30870</th>\n",
-       "      <td>cat-137068</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30871</th>\n",
-       "      <td>cat-137748</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>30872 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      catchment_id         value\n",
-       "0       cat-107068  0.000000e+00\n",
-       "1       cat-107233  5.745148e-07\n",
-       "2       cat-107683  3.051579e-07\n",
-       "3       cat-108167  0.000000e+00\n",
-       "4       cat-112977  0.000000e+00\n",
-       "...            ...           ...\n",
-       "30867   cat-137554  0.000000e+00\n",
-       "30868   cat-106885  0.000000e+00\n",
-       "30869   cat-137071  0.000000e+00\n",
-       "30870   cat-137068  0.000000e+00\n",
-       "30871   cat-137748  0.000000e+00\n",
-       "\n",
-       "[30872 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 151,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "fd2[0]\n"
+    "fd2[\"U2D\"]\n"
    ]
   },
   {
@@ -966,14 +534,22 @@
    "source": [
     "import pandas as pd\n",
     "\n",
-    "pcp_var = pd.read_csv(\"RAINRATE.csv\", index_col=0)#.rename(\"APCP_surface\")\n",
-    "lw_var = pd.read_csv(\"LWDOWN.csv\", index_col=0)#.rename(\"DLWRF_surface\")\n",
-    "sw_var = pd.read_csv(\"SWDOWN.csv\", index_col=0)#.rename(\"DSWRF_surface\")\n",
-    "sp_var = pd.read_csv(\"PSFC.csv\", index_col=0)#.rename(\"SPFH_2maboveground\")\n",
-    "tmp_var = pd.read_csv(\"T2D.csv\", index_col=0)#.rename(\"TMP_2maboveground\")\n",
-    "u2d_var = pd.read_csv(\"U2D.csv\", index_col=0)#.rename(\"UGRD_10maboveground\")\n",
-    "v2d_var = pd.read_csv(\"V2D.csv\", index_col=0)#.rename(\"VGRD_10maboveground\")\n",
-    "pcp_var2 = pd.read_csv(\"RAINRATE.csv\", index_col=0)#.rename(\"precip_rate\") ##BROKEN!!\n",
+    "# pcp_var = pd.read_csv(\"RAINRATE.csv\", index_col=0)#.rename(\"APCP_surface\")\n",
+    "# lw_var = pd.read_csv(\"LWDOWN.csv\", index_col=0)#.rename(\"DLWRF_surface\")\n",
+    "# sw_var = pd.read_csv(\"SWDOWN.csv\", index_col=0)#.rename(\"DSWRF_surface\")\n",
+    "# sp_var = pd.read_csv(\"PSFC.csv\", index_col=0)#.rename(\"SPFH_2maboveground\")\n",
+    "# tmp_var = pd.read_csv(\"T2D.csv\", index_col=0)#.rename(\"TMP_2maboveground\")\n",
+    "# u2d_var = pd.read_csv(\"U2D.csv\", index_col=0)#.rename(\"UGRD_10maboveground\")\n",
+    "# v2d_var = pd.read_csv(\"V2D.csv\", index_col=0)#.rename(\"VGRD_10maboveground\")\n",
+    "# pcp_var2 = pd.read_csv(\"RAINRATE.csv\", index_col=0)#.rename(\"precip_rate\") ##BROKEN!!\n",
+    "pcp_var = fd2[\"RAINRATE\"] \n",
+    "lw_var = fd2[\"LWDOWN\"]\n",
+    "sw_var = fd2[\"SWDOWN\"]\n",
+    "sp_var = fd2[\"PSFC\"]\n",
+    "tmp_var = fd2[\"T2D\"]\n",
+    "u2d_var = fd2[\"U2D\"]\n",
+    "v2d_var = fd2[\"V2D\"]\n",
+    "pcp_var2 = fd2[\"RAINRATE\"]\n",
     "\n",
     "for _i in range(0,40000):\n",
     "    #_i = 0\n",
@@ -991,10 +567,31 @@
     "        d.index.name = \"time\"\n",
     "\n",
     "\n",
-    "        d.to_csv(f\"input_data/cat03w_{_i:07}.csv\")\n",
+    "        d.to_csv(f\"input_data_240/cat03w_{_i:07}.csv\")\n",
     "    except:\n",
     "        print(f\"no data for watershed {_i}\", end=\"\\t\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "358d9a99-2016-4c51-b65c-e9cd92fce358",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Make a shell script string to rename the csvs...\n",
+    "gpkg_divides[\"id\"]\n",
+    "for _i, cat_id in enumerate(gpkg_divides[\"id\"]):\n",
+    "    print(f\"mv cat03w_{_i:07}.csv cat03w_{cat_id}.csv\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52383127-cc94-49cc-a0ba-3d3e2057fc08",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {