From 7f2b46f916881feee6b92b0bd1ffc06bd064fd39 Mon Sep 17 00:00:00 2001 From: Siva Rama Rohan Sunkarapalli Date: Mon, 22 May 2023 10:56:11 -0500 Subject: [PATCH 01/17] validate_url is set to false and timeout is set to 4 seconds which makes the output to display much faster --- nwm_filenames/listofnwmfilenamesretro.py | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index ab517ec..c58280f 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -53,7 +53,6 @@ def selectrun(rundict, runinput): import requests - def generate_url(date, file_type, urlbase_prefix, data_type): year_txt = f"{date.strftime('%Y')}" date_txt = f"{date.strftime('%Y%m%d%H')}" @@ -65,11 +64,16 @@ def generate_url(date, file_type, urlbase_prefix, data_type): url = f"{urlbase_prefix}{year_txt}/{date_txt}00{retrospective_var_types[data_type - 1]}" # Check if the link exists - response = requests.head(url) - if response.status_code == 200: - return url + validate_url = False + timeout = 4 + if validate_url: + response = requests.head(url, timeout=timeout) + if response.status_code == 200: + return url + else: + return None else: - return None + return url def makename( @@ -85,7 +89,6 @@ def makename( varsuffix="", run_typesuffix="", urlbase_prefix="", - validate_url=True, # Optional argument for URL validation ): datetxt = f"nwm.{date.strftime('%Y%m%d')}" foldertxt = f"{run_type}{run_typesuffix}" @@ -93,8 +96,10 @@ def makename( url = f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" + validate_url = False + timeout = 4 if validate_url: - response = requests.head(url) + response = requests.head(url, timeout=timeout) if response.status_code == 200: return url else: @@ -102,7 +107,6 @@ def makename( else: return url - # setting run_type def run_type(runinput, varinput, geoinput, default=""): if varinput == 5: # if forcing @@ -256,18 +260,7 @@ def create_archive_file_list(urlbaseinput): pass -def operational_archive_file_name_creator( - dates, - runinput, - varinput, - geoinput, - run_name, - meminput, - urlbaseinput, - fcst_cycle, - lead_time, - r, -): +def operational_archive_file_name_creator(dates, runinput, varinput, geoinput, run_name, meminput, urlbaseinput, fcst_cycle, lead_time, r): runsuff = "" try: geography = selectgeo(geodict, geoinput) @@ -668,4 +661,4 @@ def main(): if len(sys.argv) > 1 and sys.argv[1] == "--test": test_create_file_list() else: - main() + main() \ No newline at end of file From b5f9de1342df4679d1f9fa98f5a3f81147af65b7 Mon Sep 17 00:00:00 2001 From: James Halgren Date: Fri, 26 May 2023 16:57:44 -0500 Subject: [PATCH 02/17] black --- nwm_filenames/listofnwmfilenamesretro.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index c58280f..7cfaf47 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -53,6 +53,7 @@ def selectrun(rundict, runinput): import requests + def generate_url(date, file_type, urlbase_prefix, data_type): year_txt = f"{date.strftime('%Y')}" date_txt = f"{date.strftime('%Y%m%d%H')}" @@ -107,6 +108,7 @@ def makename( else: return url + # setting run_type def run_type(runinput, varinput, geoinput, default=""): if varinput == 5: # if forcing @@ -260,7 +262,18 @@ def create_archive_file_list(urlbaseinput): pass -def operational_archive_file_name_creator(dates, runinput, varinput, geoinput, run_name, meminput, urlbaseinput, fcst_cycle, lead_time, r): +def operational_archive_file_name_creator( + dates, + runinput, + varinput, + geoinput, + run_name, + meminput, + urlbaseinput, + fcst_cycle, + lead_time, + r, +): runsuff = "" try: geography = selectgeo(geodict, geoinput) @@ -661,4 +674,4 @@ def main(): if len(sys.argv) > 1 and sys.argv[1] == "--test": test_create_file_list() else: - main() \ No newline at end of file + main() From d5c408dd5652c4d7a9409f27c1403da88d5ed5d5 Mon Sep 17 00:00:00 2001 From: Siva Rama Rohan Sunkarapalli Date: Tue, 30 May 2023 10:05:11 -0500 Subject: [PATCH 03/17] The retrofiles script has been cleaned and it has functions, variables and url's with respective to the files needed --- nwm_filenames/listofnwmfilenamesretro.py | 663 ++--------------------- 1 file changed, 41 insertions(+), 622 deletions(-) diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index 7cfaf47..04967a3 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -1,33 +1,10 @@ -import sys - +import requests from dateutil import rrule from datetime import datetime from itertools import product +import multiprocessing +from multiprocessing.pool import Pool -rundict = { - 1: "short_range", - 2: "medium_range", - 3: "medium_range_no_da", - 4: "long_range", - 5: "analysis_assim", - 6: "analysis_assim_extend", - 7: "analysis_assim_extend_no_da", - 8: "analysis_assim_long", - 9: "analysis_assim_long_no_da", - 10: "analysis_assim_no_da", - 11: "short_range_no_da", -} -memdict = { - 1: "mem_1", - 2: "mem_2", - 3: "mem_3", - 4: "mem_4", - 5: "mem_5", - 6: "mem_6", - 7: "mem_7", -} -vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} -geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} retrospective_var_types = [ ".CHRTOUT_DOMAIN1.comp", ".GWOUT_DOMAIN1.comp", @@ -38,22 +15,6 @@ ] objecttype = ["forcing/", "model_output/"] - -def selectvar(vardict, varinput): - return vardict[varinput] - - -def selectgeo(geodict, geoinput): - return geodict[geoinput] - - -def selectrun(rundict, runinput): - return rundict[runinput] - - -import requests - - def generate_url(date, file_type, urlbase_prefix, data_type): year_txt = f"{date.strftime('%Y')}" date_txt = f"{date.strftime('%Y%m%d%H')}" @@ -63,615 +24,73 @@ def generate_url(date, file_type, urlbase_prefix, data_type): url = f"{urlbase_prefix}{year_txt}/{date_txt}00.LDASIN_DOMAIN1" else: url = f"{urlbase_prefix}{year_txt}/{date_txt}00{retrospective_var_types[data_type - 1]}" - - # Check if the link exists - validate_url = False - timeout = 4 - if validate_url: - response = requests.head(url, timeout=timeout) - if response.status_code == 200: - return url - else: - return None - else: - return url - - -def makename( - date, - run_name, - var_name, - fcst_cycle, - fcst_hour, - geography, - run_type, - fhprefix="", - runsuffix="", - varsuffix="", - run_typesuffix="", - urlbase_prefix="", -): - datetxt = f"nwm.{date.strftime('%Y%m%d')}" - foldertxt = f"{run_type}{run_typesuffix}" - filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" - - url = f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" - - validate_url = False - timeout = 4 - if validate_url: - response = requests.head(url, timeout=timeout) - if response.status_code == 200: - return url - else: - return None - else: - return url - - -# setting run_type -def run_type(runinput, varinput, geoinput, default=""): - if varinput == 5: # if forcing - if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii - return "forcing_analysis_assim_hawaii" - elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico - return "forcing_analysis_assim_puertorico" - elif runinput == 1 and geoinput == 2: # if short range and hawaii - return "forcing_short_range_hawaii" - elif runinput == 1 and geoinput == 3: # if short range and puerto rico - return "forcing_short_range_puertorico" - elif runinput == 5: # if analysis assim - return "forcing_analysis_assim" - elif runinput == 6: # if analysis_assim_extend - return "forcing_analysis_assim_extend" - elif runinput == 2: # if medium_range - return "forcing_medium_range" - elif runinput == 1: # if short range - return "forcing_short_range" - - elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico - return "analysis_assim_puertorico" - - elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico - return "analysis_assim_puertorico_no_da" - - elif runinput == 1 and geoinput == 3: # if short_range and puerto rico - return "short_range_puertorico" - - elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico - return "short_range_puertorico_no_da" - - else: - return default - - -def fhprefix(runinput): - if 4 <= runinput <= 10: - return "tm" - return "f" - - -def varsuffix(meminput): - if meminput in range(1, 8): - return f"_{meminput}" - else: - return "" - - -def run_typesuffix(meminput): - if meminput in range(1, 8): - return f"_mem{meminput}" - else: - return "" - - -def select_forecast_cycle(fcst_cycle=None, default=None): - if fcst_cycle: - return fcst_cycle - else: - return default - - -def select_lead_time(lead_time=None, default=None): - if lead_time: - return lead_time - else: - return default - - -urlbasedict = { - 0: "", - 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", - 3: "https://storage.googleapis.com/national-water-model/", - 4: "https://storage.cloud.google.com/national-water-model/", - 5: "gs://national-water-model/", - 6: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/", - 7: "s3://noaa-nwm-retrospective-2-1-pds/model_output/", -} - - -def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): - if urlbaseinput: - return urlbasedict[urlbaseinput] - else: - return defaulturlbase - - -""" -Check if the start and end dates provided are valid and in the correct format. - -Parameters: - start_date (str): The start date in format 'YYYYMMDDHHMM' - end_date (str): The end date in format 'YYYYMMDDHHMM' - -Raises: - ValueError: If the start or end date is not in the correct format, or if the time range is invalid. - -Returns: - Tuple: A tuple containing the start date, end date, start time, and end time. -""" + return url def validate_date_range(start_date, end_date): - try: - if len(start_date) != 12 or len(end_date) != 12: - raise ValueError("Start and end dates should be in format 'YYYYMMDDHHMM'.") - - try: - start_date_year = int(start_date[:4]) - start_date_month = int(start_date[4:6]) - start_date_day = int(start_date[6:8]) - datetime(start_date_year, start_date_month, start_date_day) - - end_date_year = int(end_date[:4]) - end_date_month = int(end_date[4:6]) - end_date_day = int(end_date[6:8]) - datetime(end_date_year, end_date_month, end_date_day) - - _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") - _until = datetime.strptime(end_date, "%Y%m%d%H%M") - _starttime = datetime.strptime(start_date[8:], "%H%M") - _endtime = datetime.strptime(end_date[8:], "%H%M") - - if ( - _starttime.hour < 0 - or _starttime.hour > 23 - or _endtime.hour < 0 - or _endtime.hour > 23 - ): - raise ValueError( - "Incorrect time range entered. Time range should be between 0000 - 2300 (HHMM)." - ) - - except ValueError: - raise ValueError("Start and end dates should be in format 'YYYYMMDDHHMM'.") - - except ValueError as ve: - raise ve - - return _dtstart, _until, _starttime, _endtime - - -def create_archive_file_list(urlbaseinput): - if urlbaseinput != 6: - # CALL operational archive file name creator - pass - else: - # CALL retrospective archive file name creator - pass - - -def operational_archive_file_name_creator( - dates, - runinput, - varinput, - geoinput, - run_name, - meminput, - urlbaseinput, - fcst_cycle, - lead_time, - r, -): - runsuff = "" - try: - geography = selectgeo(geodict, geoinput) - except: - geography = "geography_error" - try: - run_name = selectrun(rundict, runinput) - except: - run_name = "run_error" - try: - var_name = selectvar(vardict, varinput) - except: - var_name = "variable_error" - try: - urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) - except: - urlbase_prefix = "urlbase_error" - - run_t = run_type(runinput, varinput, geoinput, run_name) - fhp = fhprefix(runinput) - vsuff = varsuffix(meminput) - rtsuff = run_typesuffix(meminput) - - if runinput == 1: # if short_range - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - elif geoinput == 3: # puertorico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [6]), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif geoinput == 3: # if puerto rico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif runinput == 2: # if medium_range - if varinput == 5: # if forcing - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 19, 6)), - select_lead_time(lead_time, range(1, 241)), - ) - else: - default_fc = range(0, 19, 6) - if meminput == 1: - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 241)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 241, 3)), - ) - else: - raise ValueError("varinput") - elif meminput in range(2, 8): - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 205)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 205, 3)), - ) - else: - raise ValueError("varinput") - else: - raise ValueError("meminput") - elif runinput == 3: # if medium_range_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 6)), - select_lead_time(lead_time, range(3, 240, 3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 4: # if long_range - default_fc = range(0, 19, 6) - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(6, 721, 6)), - ) - elif varinput == 2: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(24, 721, 24)), - ) - else: - raise ValueError("varinput") - elif runinput == 5: # if analysis_assim (simplest form) - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(19)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(20)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(3)), - ) - elif runinput == 6: # if analysis_assim_extend - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - elif runinput == 7: # if analysis_assim_extend_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 8: # if analysis_assim_long - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - elif runinput == 9: # if analysis_assim_long_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 10: # if analysis_assim_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(21)), - select_lead_time(lead_time, range(3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - else: - raise ValueError("run error") - for _dt, _fc, _fh in prod: - file_name = makename( - _dt, - run_name, - var_name, - _fc, - _fh, - geography, - run_t, - fhp, - runsuff, - vsuff, - rtsuff, - urlbase_prefix, - ) - if file_name is not None: - r.append(file_name) - + _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") + _until = datetime.strptime(end_date, "%Y%m%d%H%M") + return _dtstart, _until -def retrospective_archive_file_name_creator( - start_date, end_date, objecttype, file_types, urlbase_prefix, r -): - _dtstart, _until, _starttime, _endtime = validate_date_range(start_date, end_date) +def retrospective_archive_file_name_creator(start_date, end_date, objecttype, file_types, urlbase_prefix): + _dtstart, _until = validate_date_range(start_date, end_date) dates = rrule.rrule( rrule.HOURLY, dtstart=_dtstart, until=_until, ) - if isinstance(objecttype, int): - objecttype = [objecttype] - - if not all(x in [1, 2] for x in objecttype): - raise ValueError( - "Invalid object type. Valid object types are 1, 2, or both [1, 2]." - ) - - if not all(x in [1, 2, 3, 4, 5, 6] for x in file_types): - raise ValueError( - "Invalid file type. Valid file types are any combination of [1, 2, 3, 4, 5, 6]." - ) - + r = [] datetimes = product(dates, range(1)) for _dt, th in datetimes: for tp in file_types: for obj_type in objecttype: - file_name = generate_url( - _dt, - obj_type, - urlbase_prefix, - tp, - ) + file_name = generate_url(_dt, obj_type, urlbase_prefix, tp) if file_name is not None: r.append(file_name) - -def create_file_list_retro( - runinput, - varinput, - geoinput, - meminput, - start_date=None, - end_date=None, - fcst_cycle=None, - urlbaseinput=None, - lead_time=None, - file_types=[1], - objecttype=None, -): - # for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates - try: - run_name = selectrun(rundict, runinput) - except: - run_name = "run_error" - try: - urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) - except: - urlbase_prefix = "urlbase_error" - - valid_types = [1, 2, 3, 4, 5, 6] - if not all(x in valid_types for x in file_types): - raise ValueError( - "Invalid type input. Type can be any combination of [1, 2, 3, 4, 5, 6]." - ) - - r = [] - if urlbaseinput != 6: - _dtstart, _until, _starttime, _endtime = validate_date_range( - start_date, end_date - ) - - dates = rrule.rrule( - rrule.HOURLY, - dtstart=_dtstart, - until=_until, - ) - - operational_archive_file_name_creator( - dates, - runinput, - varinput, - geoinput, - run_name, - meminput, - urlbaseinput, - fcst_cycle, - lead_time, - r, - ) - - elif urlbaseinput == 6: - retrospective_archive_file_name_creator( - start_date, end_date, objecttype, file_types, urlbase_prefix, r - ) - return r, len(r) +urlbasedict = { + 6: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/", + 7: "s3://noaa-nwm-retrospective-2-1-pds/model_output/", +} -def test_create_file_list(): - # Test - result, length = create_file_list_retro( - 2, 1, 1, 1, "197902010000", "197902020800", [12, 18], 6, [1, 2, 240], [5, 6], 1 - ) - assert isinstance(result, list) - assert isinstance(length, int) - assert len(result) == length - expected_urls = [ - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010000.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010100.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010200.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010300.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010400.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010500.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010600.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010700.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010800.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902010900.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011000.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011100.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011200.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011300.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011400.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011500.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011600.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011700.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011800.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902011900.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902012000.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902012100.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902012200.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902012300.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020000.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020100.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020200.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020300.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020400.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020500.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020600.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020700.LDASIN_DOMAIN1", - "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/forcing/1979/197902020800.LDASIN_DOMAIN1", - ] +def create_file_list_retro(start_date=None, end_date=None, fcst_cycle=None, urlbaseinput=None, file_types=[1], objecttype=None): + urlbase_prefix = urlbasedict[urlbaseinput] - if result == expected_urls: - print("Test passed!") + if urlbaseinput == 6: + return retrospective_archive_file_name_creator(start_date, end_date, objecttype, file_types, urlbase_prefix) +def check_url(file): + try: + response = requests.head(file, timeout=1) + if response.status_code == 200: + return file + except requests.exceptions.RequestException: + pass + +def check_valid_urls(file_list): + with Pool(multiprocessing.cpu_count()) as p: + valid_file_list = p.map(check_url, file_list) + return [file for file in valid_file_list if file is not None] def main(): start_date = "19790201" end_date = "19790202" fcst_cycle = [12, 18] - lead_time = [1, 2, 240] - # fcst_cycle = None # Retrieves a full day for each day within the range given. - runinput = 2 - varinput = 1 - geoinput = 1 - meminput = 1 urlbaseinput = 6 + file_types = [5, 6] + objecttype = [1] start_time = "0000" end_time = "0800" - type_input = [5, 6] - object_type = 1 - try: - file_list, length = create_file_list_retro( - runinput, - varinput, - geoinput, - meminput, - start_date + start_time, - end_date + end_time, - fcst_cycle, - urlbaseinput, - lead_time, - type_input, - object_type, - ) - if length == 0: - print(f"No files found") - else: - print(f"Files: {file_list}\nTotal files: {length}") - except ValueError as ve: - print(ve) - + file_list, length = create_file_list_retro(start_date + start_time, end_date + end_time, fcst_cycle, urlbaseinput, file_types, objecttype) + if length == 0: + print(f"No files found") + else: + print(f"Files: {file_list}\nTotal files: {len(file_list)}") + valid_file_list = check_valid_urls(file_list) + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") if __name__ == "__main__": - if len(sys.argv) > 1 and sys.argv[1] == "--test": - test_create_file_list() - else: - main() + main() \ No newline at end of file From c881048a0fd9fdc59523b9aecffeb6ea34e7602b Mon Sep 17 00:00:00 2001 From: Siva Rama Rohan Sunkarapalli Date: Wed, 7 Jun 2023 15:37:20 -0500 Subject: [PATCH 04/17] Modified the script to make it more efficient --- nwm_filenames/listofnwmfilenamesretro.py | 124 +++++++++++------------ 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index 04967a3..b8ac4a8 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -1,66 +1,62 @@ import requests -from dateutil import rrule -from datetime import datetime -from itertools import product -import multiprocessing -from multiprocessing.pool import Pool - -retrospective_var_types = [ - ".CHRTOUT_DOMAIN1.comp", - ".GWOUT_DOMAIN1.comp", - ".LAKEOUT_DOMAIN1.comp", - ".LDASOUT_DOMAIN1.comp", - ".RTOUT_DOMAIN1.comp", - ".LDASIN_DOMAIN1.comp", -] -objecttype = ["forcing/", "model_output/"] - -def generate_url(date, file_type, urlbase_prefix, data_type): - year_txt = f"{date.strftime('%Y')}" - date_txt = f"{date.strftime('%Y%m%d%H')}" - urlbase_prefix = urlbase_prefix + objecttype[file_type - 1] - - if data_type == 6: - url = f"{urlbase_prefix}{year_txt}/{date_txt}00.LDASIN_DOMAIN1" - else: - url = f"{urlbase_prefix}{year_txt}/{date_txt}00{retrospective_var_types[data_type - 1]}" - return url - - -def validate_date_range(start_date, end_date): - _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") - _until = datetime.strptime(end_date, "%Y%m%d%H%M") - return _dtstart, _until +from datetime import datetime, timedelta +from concurrent.futures import ThreadPoolExecutor -def retrospective_archive_file_name_creator(start_date, end_date, objecttype, file_types, urlbase_prefix): - _dtstart, _until = validate_date_range(start_date, end_date) - dates = rrule.rrule( - rrule.HOURLY, - dtstart=_dtstart, - until=_until, - ) - - r = [] - datetimes = product(dates, range(1)) - for _dt, th in datetimes: - for tp in file_types: - for obj_type in objecttype: - file_name = generate_url(_dt, obj_type, urlbase_prefix, tp) - if file_name is not None: - r.append(file_name) +retrospective_var_types = { + 1: ".CHRTOUT_DOMAIN1.comp", + 2: ".GWOUT_DOMAIN1.comp", + 3: ".LAKEOUT_DOMAIN1.comp", + 4: ".LDASOUT_DOMAIN1.comp", + 5: ".RTOUT_DOMAIN1.comp", + 6: ".LDASIN_DOMAIN1.comp", +} - return r, len(r) +objecttypes = { + 1: "forcing/", + 2: "model_output/" +} urlbasedict = { 6: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/", 7: "s3://noaa-nwm-retrospective-2-1-pds/model_output/", } -def create_file_list_retro(start_date=None, end_date=None, fcst_cycle=None, urlbaseinput=None, file_types=[1], objecttype=None): +def generate_url(date, file_type, urlbase_prefix, retrospective_var_types=None): + year_txt = date.strftime('%Y') + date_txt = date.strftime('%Y%m%d%H') + + if file_type == "forcing/": + if 1979 <= date.year <= 2006: + date_txt += date.strftime('%M') + url = f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}.LDASIN_DOMAIN1" + elif file_type == "model_output/": + url = [f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00{type}" for type in retrospective_var_types] + + return url + + +def create_file_list_retro(start_date=None, end_date=None, urlbaseinput=None, objecttype=objecttypes, selected_var_types=None): urlbase_prefix = urlbasedict[urlbaseinput] + objecttype = [objecttypes[i] for i in objecttype] + retrospective_var_types_selected = [retrospective_var_types[i] for i in selected_var_types] + + start_dt = datetime.strptime(start_date, "%Y%m%d%H%M") + end_dt = datetime.strptime(end_date, "%Y%m%d%H%M") + + delta = end_dt - start_dt + date_range = [start_dt + timedelta(hours=i) for i in range(delta.days * 24 + delta.seconds // 3600 + 1)] + + file_list = [] + for date in date_range: + for obj_type in objecttype: + file_names = generate_url(date, obj_type, urlbase_prefix, retrospective_var_types_selected) + if file_names is not None: + if isinstance(file_names, list): + file_list.extend(file_names) + else: + file_list.append(file_names) - if urlbaseinput == 6: - return retrospective_archive_file_name_creator(start_date, end_date, objecttype, file_types, urlbase_prefix) + return file_list def check_url(file): try: @@ -71,25 +67,29 @@ def check_url(file): pass def check_valid_urls(file_list): - with Pool(multiprocessing.cpu_count()) as p: - valid_file_list = p.map(check_url, file_list) + with ThreadPoolExecutor() as executor: + valid_file_list = list(executor.map(check_url, file_list)) + return [file for file in valid_file_list if file is not None] def main(): - start_date = "19790201" - end_date = "19790202" - fcst_cycle = [12, 18] + start_date = "20150201" + end_date = "20150202" urlbaseinput = 6 - file_types = [5, 6] - objecttype = [1] + selected_var_types = [1,2] + selected_object_types = [2] # To test both forcing and model_output start_time = "0000" end_time = "0800" - file_list, length = create_file_list_retro(start_date + start_time, end_date + end_time, fcst_cycle, urlbaseinput, file_types, objecttype) - if length == 0: + + file_list = create_file_list_retro(start_date + start_time, end_date + end_time, urlbaseinput, selected_object_types, selected_var_types) + + if len(file_list) == 0: print(f"No files found") else: - print(f"Files: {file_list}\nTotal files: {len(file_list)}") + print(f"Files: {file_list}\nTotal Files: {len(file_list)}") + valid_file_list = check_valid_urls(file_list) + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") if __name__ == "__main__": From 0bea2dc58b9be1aa00b9ab12f9281fe5a29d7b2f Mon Sep 17 00:00:00 2001 From: Siva Rama Rohan Sunkarapalli Date: Wed, 7 Jun 2023 15:47:25 -0500 Subject: [PATCH 05/17] Modified the script to run more efficiently --- nwm_filenames/listofnwmfilenamesretro.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index b8ac4a8..fc28299 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -25,11 +25,12 @@ def generate_url(date, file_type, urlbase_prefix, retrospective_var_types=None): year_txt = date.strftime('%Y') date_txt = date.strftime('%Y%m%d%H') - if file_type == "forcing/": - if 1979 <= date.year <= 2006: - date_txt += date.strftime('%M') + if 1979 <= date.year <= 2006: + date_txt += date.strftime('%M') + + if "forcing" in file_type: url = f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}.LDASIN_DOMAIN1" - elif file_type == "model_output/": + elif "model_output" in file_type: url = [f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00{type}" for type in retrospective_var_types] return url @@ -60,24 +61,24 @@ def create_file_list_retro(start_date=None, end_date=None, urlbaseinput=None, ob def check_url(file): try: - response = requests.head(file, timeout=1) + response = session.head(file, timeout=1) if response.status_code == 200: return file except requests.exceptions.RequestException: pass def check_valid_urls(file_list): - with ThreadPoolExecutor() as executor: + with ThreadPoolExecutor(max_workers=10) as executor: valid_file_list = list(executor.map(check_url, file_list)) return [file for file in valid_file_list if file is not None] def main(): - start_date = "20150201" - end_date = "20150202" + start_date = "20170201" + end_date = "20170202" urlbaseinput = 6 - selected_var_types = [1,2] - selected_object_types = [2] # To test both forcing and model_output + selected_var_types = [1,3] + selected_object_types = [1] # To test both forcing and model_output start_time = "0000" end_time = "0800" @@ -87,10 +88,11 @@ def main(): print(f"No files found") else: print(f"Files: {file_list}\nTotal Files: {len(file_list)}") - + valid_file_list = check_valid_urls(file_list) print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") if __name__ == "__main__": + session = requests.Session() main() \ No newline at end of file From 4dd015d9f7614719c3cb8689f0d6b58a40eb4552 Mon Sep 17 00:00:00 2001 From: Siva Rama Rohan Sunkarapalli Date: Wed, 7 Jun 2023 20:13:15 -0500 Subject: [PATCH 06/17] Added URL check and updated URL's input --- nwm_filenames/listofnwmfilenames.py | 49 +++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/nwm_filenames/listofnwmfilenames.py b/nwm_filenames/listofnwmfilenames.py index 1828985..41cb60c 100644 --- a/nwm_filenames/listofnwmfilenames.py +++ b/nwm_filenames/listofnwmfilenames.py @@ -1,6 +1,11 @@ +import multiprocessing + +import requests from dateutil import rrule from datetime import datetime, timezone from itertools import product +import os +os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path/to/your/service-account-file.json' rundict = { 1: "short_range", @@ -141,8 +146,8 @@ def select_lead_time(lead_time=None, default=None): 3: "https://storage.googleapis.com/national-water-model/", 4: "https://storage.cloud.google.com/national-water-model/", 5: "gs://national-water-model/", - 6: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/model_output/", - 7: "s3://noaa-nwm-retrospective-2-1-pds/model_output/", + 6: "gcs://national-water-model/", + 7: "https://noaa-nwm-pds.s3.amazonaws.com/", } @@ -390,28 +395,42 @@ def create_file_list( ) return r +from google.cloud import storage +def check_url(file): + try: + with requests.get(file, stream=True, timeout=1) as response: + response.raise_for_status() + return file + except requests.exceptions.RequestException: + pass + + +def check_valid_urls(file_list): + with multiprocessing.Pool(multiprocessing.cpu_count()) as p: + valid_file_list = p.map(check_url, file_list) + return [file for file in valid_file_list if file is not None] + def main(): - start_date = "20220822" - end_date = "20220824" + start_date = "20030402" + end_date = "20030420" - fcst_cycle = [12, 18] - lead_time = [1, 2, 240] + fcst_cycle = [5, 12] + lead_time = [158] # fcst_cycle = None # Retrieves a full day for each day within the range given. runinput = 2 - varinput = 1 + varinput = 3 geoinput = 1 - meminput = 1 + meminput = 5 - urlbaseinput = None + urlbaseinput = 4 - print( - create_file_list( + file_list = create_file_list( runinput, varinput, geoinput, @@ -421,9 +440,13 @@ def main(): fcst_cycle, urlbaseinput, lead_time, - ) ) - + if len(file_list) == 0: + print(f"No files found") + else: + print(f"Files: {file_list}\nTotal files: {len(file_list)}") + valid_file_list = check_valid_urls(file_list) + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") if __name__ == "__main__": main() From d2dc329c19ac3e23acc02e40d7c937853f62868c Mon Sep 17 00:00:00 2001 From: Siva Rama Rohan Sunkarapalli Date: Thu, 8 Jun 2023 10:08:09 -0500 Subject: [PATCH 07/17] Corrected year condition for forcing and model_output to generate URL's --- nwm_filenames/listofnwmfilenamesretro.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index fc28299..fe10a5d 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -25,10 +25,9 @@ def generate_url(date, file_type, urlbase_prefix, retrospective_var_types=None): year_txt = date.strftime('%Y') date_txt = date.strftime('%Y%m%d%H') - if 1979 <= date.year <= 2006: - date_txt += date.strftime('%M') - - if "forcing" in file_type: + if "forcing" in file_type and date.year < 2007: + url = f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00.LDASIN_DOMAIN1" + elif "forcing" in file_type and date.year >= 2007: url = f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}.LDASIN_DOMAIN1" elif "model_output" in file_type: url = [f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00{type}" for type in retrospective_var_types] @@ -74,10 +73,10 @@ def check_valid_urls(file_list): return [file for file in valid_file_list if file is not None] def main(): - start_date = "20170201" - end_date = "20170202" + start_date = "20070101" + end_date = "20070102" urlbaseinput = 6 - selected_var_types = [1,3] + selected_var_types = [1,2] selected_object_types = [1] # To test both forcing and model_output start_time = "0000" end_time = "0800" From 37d81b2d8dc66abd105b75716c3b288bac45c32f Mon Sep 17 00:00:00 2001 From: James Halgren Date: Tue, 13 Jun 2023 17:41:59 -0500 Subject: [PATCH 08/17] format with black --- nwm_filenames/listofnwmfilenames.py | 33 ++++++++----- nwm_filenames/listofnwmfilenamesretro.py | 62 ++++++++++++++++-------- 2 files changed, 64 insertions(+), 31 deletions(-) diff --git a/nwm_filenames/listofnwmfilenames.py b/nwm_filenames/listofnwmfilenames.py index 41cb60c..940347f 100644 --- a/nwm_filenames/listofnwmfilenames.py +++ b/nwm_filenames/listofnwmfilenames.py @@ -5,7 +5,8 @@ from datetime import datetime, timezone from itertools import product import os -os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path/to/your/service-account-file.json' + +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service-account-file.json" rundict = { 1: "short_range", @@ -169,7 +170,7 @@ def create_file_list( urlbaseinput=None, lead_time=None, # TODO: change this order; placed here to avoid breaking change ): - """ for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" + """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" runsuff = "" @@ -198,7 +199,11 @@ def create_file_list( _dtstart = today _until = today - dates = rrule.rrule(rrule.DAILY, dtstart=_dtstart, until=_until,) + dates = rrule.rrule( + rrule.DAILY, + dtstart=_dtstart, + until=_until, + ) run_t = run_type(runinput, varinput, geoinput, run_name) fhp = fhprefix(runinput) vsuff = varsuffix(meminput) @@ -395,7 +400,10 @@ def create_file_list( ) return r + from google.cloud import storage + + def check_url(file): try: with requests.get(file, stream=True, timeout=1) as response: @@ -431,15 +439,15 @@ def main(): urlbaseinput = 4 file_list = create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date, - end_date, - fcst_cycle, - urlbaseinput, - lead_time, + runinput, + varinput, + geoinput, + meminput, + start_date, + end_date, + fcst_cycle, + urlbaseinput, + lead_time, ) if len(file_list) == 0: print(f"No files found") @@ -448,5 +456,6 @@ def main(): valid_file_list = check_valid_urls(file_list) print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + if __name__ == "__main__": main() diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index fe10a5d..b518183 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -11,45 +11,59 @@ 6: ".LDASIN_DOMAIN1.comp", } -objecttypes = { - 1: "forcing/", - 2: "model_output/" -} +objecttypes = {1: "forcing/", 2: "model_output/"} urlbasedict = { 6: "https://noaa-nwm-retrospective-2-1-pds.s3.amazonaws.com/", 7: "s3://noaa-nwm-retrospective-2-1-pds/model_output/", } + def generate_url(date, file_type, urlbase_prefix, retrospective_var_types=None): - year_txt = date.strftime('%Y') - date_txt = date.strftime('%Y%m%d%H') + year_txt = date.strftime("%Y") + date_txt = date.strftime("%Y%m%d%H") if "forcing" in file_type and date.year < 2007: url = f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00.LDASIN_DOMAIN1" elif "forcing" in file_type and date.year >= 2007: url = f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}.LDASIN_DOMAIN1" elif "model_output" in file_type: - url = [f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00{type}" for type in retrospective_var_types] + url = [ + f"{urlbase_prefix}{file_type}{year_txt}/{date_txt}00{type}" + for type in retrospective_var_types + ] return url -def create_file_list_retro(start_date=None, end_date=None, urlbaseinput=None, objecttype=objecttypes, selected_var_types=None): +def create_file_list_retro( + start_date=None, + end_date=None, + urlbaseinput=None, + objecttype=objecttypes, + selected_var_types=None, +): urlbase_prefix = urlbasedict[urlbaseinput] objecttype = [objecttypes[i] for i in objecttype] - retrospective_var_types_selected = [retrospective_var_types[i] for i in selected_var_types] + retrospective_var_types_selected = [ + retrospective_var_types[i] for i in selected_var_types + ] start_dt = datetime.strptime(start_date, "%Y%m%d%H%M") end_dt = datetime.strptime(end_date, "%Y%m%d%H%M") delta = end_dt - start_dt - date_range = [start_dt + timedelta(hours=i) for i in range(delta.days * 24 + delta.seconds // 3600 + 1)] + date_range = [ + start_dt + timedelta(hours=i) + for i in range(delta.days * 24 + delta.seconds // 3600 + 1) + ] file_list = [] for date in date_range: for obj_type in objecttype: - file_names = generate_url(date, obj_type, urlbase_prefix, retrospective_var_types_selected) + file_names = generate_url( + date, obj_type, urlbase_prefix, retrospective_var_types_selected + ) if file_names is not None: if isinstance(file_names, list): file_list.extend(file_names) @@ -58,6 +72,7 @@ def create_file_list_retro(start_date=None, end_date=None, urlbaseinput=None, ob return file_list + def check_url(file): try: response = session.head(file, timeout=1) @@ -66,32 +81,41 @@ def check_url(file): except requests.exceptions.RequestException: pass + def check_valid_urls(file_list): with ThreadPoolExecutor(max_workers=10) as executor: valid_file_list = list(executor.map(check_url, file_list)) - + return [file for file in valid_file_list if file is not None] + def main(): start_date = "20070101" end_date = "20070102" urlbaseinput = 6 - selected_var_types = [1,2] - selected_object_types = [1] # To test both forcing and model_output + selected_var_types = [1, 2] + selected_object_types = [1] # To test both forcing and model_output start_time = "0000" end_time = "0800" - - file_list = create_file_list_retro(start_date + start_time, end_date + end_time, urlbaseinput, selected_object_types, selected_var_types) - + + file_list = create_file_list_retro( + start_date + start_time, + end_date + end_time, + urlbaseinput, + selected_object_types, + selected_var_types, + ) + if len(file_list) == 0: print(f"No files found") else: print(f"Files: {file_list}\nTotal Files: {len(file_list)}") valid_file_list = check_valid_urls(file_list) - + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + if __name__ == "__main__": session = requests.Session() - main() \ No newline at end of file + main() From 9c08b9de19fea863c3cbb7f168eb7b230ad7d4fb Mon Sep 17 00:00:00 2001 From: James Halgren Date: Tue, 13 Jun 2023 18:39:29 -0500 Subject: [PATCH 09/17] use helper file/function for multi-thread file check --- nwm_filenames/filename_helpers.py | 23 +++++++++++++++++++++++ nwm_filenames/listofnwmfilenames.py | 24 +----------------------- nwm_filenames/listofnwmfilenamesretro.py | 21 ++------------------- 3 files changed, 26 insertions(+), 42 deletions(-) create mode 100644 nwm_filenames/filename_helpers.py diff --git a/nwm_filenames/filename_helpers.py b/nwm_filenames/filename_helpers.py new file mode 100644 index 0000000..2660a80 --- /dev/null +++ b/nwm_filenames/filename_helpers.py @@ -0,0 +1,23 @@ +from concurrent.futures import ThreadPoolExecutor +import requests +from functools import partial + + +def check_valid_urls(file_list, session=None): + if not session: + session = requests.Session() + check_url_part = partial(check_url, session) + with ThreadPoolExecutor(max_workers=10) as executor: + valid_file_list = list(executor.map(check_url_part, file_list)) + + return [file for file in valid_file_list if file is not None] + + +def check_url(session, file): + try: + with requests.get(file, stream=True, timeout=1) as response: + response.raise_for_status() + return file + response = session.head(file, timeout=1) + except requests.exceptions.RequestException: + pass diff --git a/nwm_filenames/listofnwmfilenames.py b/nwm_filenames/listofnwmfilenames.py index 940347f..a5563ea 100644 --- a/nwm_filenames/listofnwmfilenames.py +++ b/nwm_filenames/listofnwmfilenames.py @@ -1,12 +1,8 @@ -import multiprocessing - -import requests from dateutil import rrule from datetime import datetime, timezone from itertools import product -import os -os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service-account-file.json" +from filename_helpers import check_valid_urls rundict = { 1: "short_range", @@ -401,24 +397,6 @@ def create_file_list( return r -from google.cloud import storage - - -def check_url(file): - try: - with requests.get(file, stream=True, timeout=1) as response: - response.raise_for_status() - return file - except requests.exceptions.RequestException: - pass - - -def check_valid_urls(file_list): - with multiprocessing.Pool(multiprocessing.cpu_count()) as p: - valid_file_list = p.map(check_url, file_list) - return [file for file in valid_file_list if file is not None] - - def main(): start_date = "20030402" diff --git a/nwm_filenames/listofnwmfilenamesretro.py b/nwm_filenames/listofnwmfilenamesretro.py index b518183..5e4d5e4 100644 --- a/nwm_filenames/listofnwmfilenamesretro.py +++ b/nwm_filenames/listofnwmfilenamesretro.py @@ -1,6 +1,6 @@ -import requests from datetime import datetime, timedelta -from concurrent.futures import ThreadPoolExecutor + +from filename_helpers import check_valid_urls retrospective_var_types = { 1: ".CHRTOUT_DOMAIN1.comp", @@ -73,22 +73,6 @@ def create_file_list_retro( return file_list -def check_url(file): - try: - response = session.head(file, timeout=1) - if response.status_code == 200: - return file - except requests.exceptions.RequestException: - pass - - -def check_valid_urls(file_list): - with ThreadPoolExecutor(max_workers=10) as executor: - valid_file_list = list(executor.map(check_url, file_list)) - - return [file for file in valid_file_list if file is not None] - - def main(): start_date = "20070101" end_date = "20070102" @@ -117,5 +101,4 @@ def main(): if __name__ == "__main__": - session = requests.Session() main() From ebb04b63f6b7509ba4e3ef370c5a9750f678090e Mon Sep 17 00:00:00 2001 From: James Halgren Date: Fri, 16 Jun 2023 16:30:27 -0500 Subject: [PATCH 10/17] change test to use public Google URL --- nwm_filenames/listofnwmfilenames.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nwm_filenames/listofnwmfilenames.py b/nwm_filenames/listofnwmfilenames.py index a5563ea..9072164 100644 --- a/nwm_filenames/listofnwmfilenames.py +++ b/nwm_filenames/listofnwmfilenames.py @@ -414,7 +414,7 @@ def main(): meminput = 5 - urlbaseinput = 4 + urlbaseinput = 3 file_list = create_file_list( runinput, From 099ec78b77a2d40744547e05c992f6601f2d30d9 Mon Sep 17 00:00:00 2001 From: Rohan S Date: Thu, 12 Oct 2023 07:44:18 +0000 Subject: [PATCH 11/17] Python script to upload operational data into aws bucket --- .../listofnwmfilenames-checkpoint.py | 441 ++++++++++++++++ .../operational_aws/filename_helpers.py | 38 ++ .../operational_aws/listofnwmfilenames.py | 441 ++++++++++++++++ nwm_filenames/operational_aws/upload.py | 52 ++ .../listofnwmfilenames-checkpoint.py | 493 ++++++++++++++++++ .../operational_aws_api/filename_helpers.py | 38 ++ .../operational_aws_api/listofnwmfilenames.py | 493 ++++++++++++++++++ nwm_filenames/operational_aws_api/upload.py | 52 ++ 8 files changed, 2048 insertions(+) create mode 100644 nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py create mode 100644 nwm_filenames/operational_aws/filename_helpers.py create mode 100644 nwm_filenames/operational_aws/listofnwmfilenames.py create mode 100644 nwm_filenames/operational_aws/upload.py create mode 100644 nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py create mode 100644 nwm_filenames/operational_aws_api/filename_helpers.py create mode 100644 nwm_filenames/operational_aws_api/listofnwmfilenames.py create mode 100644 nwm_filenames/operational_aws_api/upload.py diff --git a/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py new file mode 100644 index 0000000..61552f2 --- /dev/null +++ b/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py @@ -0,0 +1,441 @@ +from gevent import monkey +monkey.patch_all() +from dateutil import rrule +from datetime import datetime, timezone +from itertools import product +from filename_helpers import check_valid_urls +import time + +rundict = { + 1: "short_range", + 2: "medium_range", + 3: "medium_range_no_da", + 4: "long_range", + 5: "analysis_assim", + 6: "analysis_assim_extend", + 7: "analysis_assim_extend_no_da", + 8: "analysis_assim_long", + 9: "analysis_assim_long_no_da", + 10: "analysis_assim_no_da", + 11: "short_range_no_da", +} +memdict = { + 1: "mem_1", + 2: "mem_2", + 3: "mem_3", + 4: "mem_4", + 5: "mem_5", + 6: "mem_6", + 7: "mem_7", +} +vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} +geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} + + +def selectvar(vardict, varinput): + return vardict[varinput] + + +def selectgeo(geodict, geoinput): + return geodict[geoinput] + + +def selectrun(rundict, runinput): + return rundict[runinput] + + +def makename( + date, + run_name, + var_name, + fcst_cycle, + fcst_hour, + geography, + run_type, + fhprefix="", + runsuffix="", + varsuffix="", + run_typesuffix="", + urlbase_prefix="", +): + """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" + + datetxt = f"nwm.{date.strftime('%Y%m%d')}" + foldertxt = f"{run_type}{run_typesuffix}" + filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" + return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" + + +# setting run_type +def run_type(runinput, varinput, geoinput, default=""): + """This function takes the numeric command line input and converts to the text used in the url.""" + + if varinput == 5: # if forcing + if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii + return "forcing_analysis_assim_hawaii" + elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico + return "forcing_analysis_assim_puertorico" + elif runinput == 1 and geoinput == 2: # if short range and hawaii + return "forcing_short_range_hawaii" + elif runinput == 1 and geoinput == 3: # if short range and puerto rico + return "forcing_short_range_puertorico" + elif runinput == 5: # if analysis assim + return "forcing_analysis_assim" + elif runinput == 6: # if analysis_assim_extend + return "forcing_analysis_assim_extend" + elif runinput == 2: # if medium_range + return "forcing_medium_range" + elif runinput == 1: # if short range + return "forcing_short_range" + + elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico + return "analysis_assim_puertorico" + + elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico + return "analysis_assim_puertorico_no_da" + + elif runinput == 1 and geoinput == 3: # if short_range and puerto rico + return "short_range_puertorico" + + elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico + return "short_range_puertorico_no_da" + + else: + return default + + +def fhprefix(runinput): + if 4 <= runinput <= 10: + return "tm" + return "f" + + +def varsuffix(meminput): + if meminput in range(1, 8): + return f"_{meminput}" + else: + return "" + + +def run_typesuffix(meminput): + if meminput in range(1, 8): + return f"_mem{meminput}" + else: + return "" + + +def select_forecast_cycle(fcst_cycle=None, default=None): + if fcst_cycle: + return fcst_cycle + else: + return default + + +def select_lead_time(lead_time=None, default=None): + if lead_time: + return lead_time + else: + return default + + +urlbasedict = { + 0: "", + 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", + 3: "https://storage.googleapis.com/national-water-model/", + 4: "https://storage.cloud.google.com/national-water-model/", + 5: "gs://national-water-model/", + 6: "gcs://national-water-model/", + 7: "https://noaa-nwm-pds.s3.amazonaws.com/", +} + + +def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): + if urlbaseinput: + return urlbasedict[urlbaseinput] + else: + return defaulturlbase + + +def create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date=None, + end_date=None, + fcst_cycle=None, + urlbaseinput=None, + lead_time=None, # TODO: change this order; placed here to avoid breaking change +): + """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" + + runsuff = "" + + try: + geography = selectgeo(geodict, geoinput) + except: + geography = "geography_error" + try: + run_name = selectrun(rundict, runinput) + except: + run_name = "run_error" + try: + var_name = selectvar(vardict, varinput) + except: + var_name = "variable_error" + try: + urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) + except: + urlbase_prefix = "urlbase_error" + + try: + _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") + _until = datetime.strptime(end_date, "%Y%m%d%H%M") + except: + today = datetime.now(timezone.utc) + _dtstart = today + _until = today + + dates = rrule.rrule( + rrule.DAILY, + dtstart=_dtstart, + until=_until, + ) + run_t = run_type(runinput, varinput, geoinput, run_name) + fhp = fhprefix(runinput) + vsuff = varsuffix(meminput) + rtsuff = run_typesuffix(meminput) + + if runinput == 1: # if short_range + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + elif geoinput == 3: # puertorico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [6]), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif geoinput == 3: # if puerto rico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif runinput == 2: # if medium_range + if varinput == 5: # if forcing + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 19, 6)), + select_lead_time(lead_time, range(1, 241)), + ) + else: + default_fc = range(0, 19, 6) + if meminput == 1: + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 241)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 241, 3)), + ) + else: + raise ValueError("varinput") + elif meminput in range(2, 8): + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 205)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 205, 3)), + ) + else: + raise ValueError("varinput") + else: + raise ValueError("meminput") + elif runinput == 3: # if medium_range_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 6)), + select_lead_time(lead_time, range(3, 240, 3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 4: # if long_range + default_fc = range(0, 19, 6) + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(6, 721, 6)), + ) + elif varinput == 2: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(24, 721, 24)), + ) + else: + raise ValueError("varinput") + elif runinput == 5: # if analysis_assim (simplest form) + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(19)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(20)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(3)), + ) + elif runinput == 6: # if analysis_assim_extend + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + elif runinput == 7: # if analysis_assim_extend_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 8: # if analysis_assim_long + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + elif runinput == 9: # if analysis_assim_long_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 10: # if analysis_assim_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(21)), + select_lead_time(lead_time, range(3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + else: + raise ValueError("run error") + + r = [] + for _dt, _fc, _fh in prod: + r.append( + makename( + _dt, + run_name, + var_name, + _fc, + _fh, + geography, + run_t, + fhp, + runsuff, + vsuff, + rtsuff, + urlbase_prefix, + ) + ) + return r + + +def main(): + + + start_date = "201809170000" + end_date = "201809172300" + fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] + lead_time = [1, 2, 240] + # fcst_cycle = None # Retrieves a full day for each day within the range given. + runinput = 1 + varinput = 1 + geoinput = 1 + meminput = 0 + urlbaseinput = 3 + + file_list = create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date, + end_date, + fcst_cycle, + urlbaseinput, + ) + if len(file_list) == 0: + print(f"No files found") + else: + print(f"Files: {file_list}\nTotal files: {len(file_list)}") + valid_file_list = check_valid_urls(file_list) + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + + with open("filenamelist.txt", "w") as file: + for item in valid_file_list: + file.write(f"{item}\n") + + +if __name__ == "__main__": + start = time.time() + main() + print(time.time() - start) diff --git a/nwm_filenames/operational_aws/filename_helpers.py b/nwm_filenames/operational_aws/filename_helpers.py new file mode 100644 index 0000000..e04b2c1 --- /dev/null +++ b/nwm_filenames/operational_aws/filename_helpers.py @@ -0,0 +1,38 @@ +#from concurrent.futures import ThreadPoolExecutor +import gevent +import requests +from functools import partial +from tqdm import tqdm + +def check_valid_urls(file_list, session=None): + """if not session: + session = requests.Session()""" + t = tqdm(range(len(file_list))) + check_url_part = partial(check_url, t) + """with ThreadPoolExecutor(max_workers=10) as executor: + valid_file_list = list(executor.map(check_url_part, file_list))""" + valid_file_list = [gevent.spawn(check_url_part, file_name) for file_name in file_list] + gevent.joinall(valid_file_list) + return [file.get() for file in valid_file_list if file.get() is not None] + + +def check_url(t, file): + filename = file.split("/")[-1] + try: + with requests.head(file) as response: + if response.status_code == 200: + t.set_description(f"Found: {filename}") + t.update(1) + t.refresh() + return file + else: + t.set_description(f"Not Found: {filename}") + t.update(1) + t.refresh() + return None + #response = session.head(file, timeout=1) + except requests.exceptions.RequestException: + t.set_description(f"Not Found: {filename}") + t.update(1) + t.refresh() + return None diff --git a/nwm_filenames/operational_aws/listofnwmfilenames.py b/nwm_filenames/operational_aws/listofnwmfilenames.py new file mode 100644 index 0000000..61552f2 --- /dev/null +++ b/nwm_filenames/operational_aws/listofnwmfilenames.py @@ -0,0 +1,441 @@ +from gevent import monkey +monkey.patch_all() +from dateutil import rrule +from datetime import datetime, timezone +from itertools import product +from filename_helpers import check_valid_urls +import time + +rundict = { + 1: "short_range", + 2: "medium_range", + 3: "medium_range_no_da", + 4: "long_range", + 5: "analysis_assim", + 6: "analysis_assim_extend", + 7: "analysis_assim_extend_no_da", + 8: "analysis_assim_long", + 9: "analysis_assim_long_no_da", + 10: "analysis_assim_no_da", + 11: "short_range_no_da", +} +memdict = { + 1: "mem_1", + 2: "mem_2", + 3: "mem_3", + 4: "mem_4", + 5: "mem_5", + 6: "mem_6", + 7: "mem_7", +} +vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} +geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} + + +def selectvar(vardict, varinput): + return vardict[varinput] + + +def selectgeo(geodict, geoinput): + return geodict[geoinput] + + +def selectrun(rundict, runinput): + return rundict[runinput] + + +def makename( + date, + run_name, + var_name, + fcst_cycle, + fcst_hour, + geography, + run_type, + fhprefix="", + runsuffix="", + varsuffix="", + run_typesuffix="", + urlbase_prefix="", +): + """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" + + datetxt = f"nwm.{date.strftime('%Y%m%d')}" + foldertxt = f"{run_type}{run_typesuffix}" + filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" + return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" + + +# setting run_type +def run_type(runinput, varinput, geoinput, default=""): + """This function takes the numeric command line input and converts to the text used in the url.""" + + if varinput == 5: # if forcing + if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii + return "forcing_analysis_assim_hawaii" + elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico + return "forcing_analysis_assim_puertorico" + elif runinput == 1 and geoinput == 2: # if short range and hawaii + return "forcing_short_range_hawaii" + elif runinput == 1 and geoinput == 3: # if short range and puerto rico + return "forcing_short_range_puertorico" + elif runinput == 5: # if analysis assim + return "forcing_analysis_assim" + elif runinput == 6: # if analysis_assim_extend + return "forcing_analysis_assim_extend" + elif runinput == 2: # if medium_range + return "forcing_medium_range" + elif runinput == 1: # if short range + return "forcing_short_range" + + elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico + return "analysis_assim_puertorico" + + elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico + return "analysis_assim_puertorico_no_da" + + elif runinput == 1 and geoinput == 3: # if short_range and puerto rico + return "short_range_puertorico" + + elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico + return "short_range_puertorico_no_da" + + else: + return default + + +def fhprefix(runinput): + if 4 <= runinput <= 10: + return "tm" + return "f" + + +def varsuffix(meminput): + if meminput in range(1, 8): + return f"_{meminput}" + else: + return "" + + +def run_typesuffix(meminput): + if meminput in range(1, 8): + return f"_mem{meminput}" + else: + return "" + + +def select_forecast_cycle(fcst_cycle=None, default=None): + if fcst_cycle: + return fcst_cycle + else: + return default + + +def select_lead_time(lead_time=None, default=None): + if lead_time: + return lead_time + else: + return default + + +urlbasedict = { + 0: "", + 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", + 3: "https://storage.googleapis.com/national-water-model/", + 4: "https://storage.cloud.google.com/national-water-model/", + 5: "gs://national-water-model/", + 6: "gcs://national-water-model/", + 7: "https://noaa-nwm-pds.s3.amazonaws.com/", +} + + +def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): + if urlbaseinput: + return urlbasedict[urlbaseinput] + else: + return defaulturlbase + + +def create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date=None, + end_date=None, + fcst_cycle=None, + urlbaseinput=None, + lead_time=None, # TODO: change this order; placed here to avoid breaking change +): + """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" + + runsuff = "" + + try: + geography = selectgeo(geodict, geoinput) + except: + geography = "geography_error" + try: + run_name = selectrun(rundict, runinput) + except: + run_name = "run_error" + try: + var_name = selectvar(vardict, varinput) + except: + var_name = "variable_error" + try: + urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) + except: + urlbase_prefix = "urlbase_error" + + try: + _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") + _until = datetime.strptime(end_date, "%Y%m%d%H%M") + except: + today = datetime.now(timezone.utc) + _dtstart = today + _until = today + + dates = rrule.rrule( + rrule.DAILY, + dtstart=_dtstart, + until=_until, + ) + run_t = run_type(runinput, varinput, geoinput, run_name) + fhp = fhprefix(runinput) + vsuff = varsuffix(meminput) + rtsuff = run_typesuffix(meminput) + + if runinput == 1: # if short_range + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + elif geoinput == 3: # puertorico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [6]), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif geoinput == 3: # if puerto rico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif runinput == 2: # if medium_range + if varinput == 5: # if forcing + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 19, 6)), + select_lead_time(lead_time, range(1, 241)), + ) + else: + default_fc = range(0, 19, 6) + if meminput == 1: + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 241)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 241, 3)), + ) + else: + raise ValueError("varinput") + elif meminput in range(2, 8): + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 205)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 205, 3)), + ) + else: + raise ValueError("varinput") + else: + raise ValueError("meminput") + elif runinput == 3: # if medium_range_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 6)), + select_lead_time(lead_time, range(3, 240, 3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 4: # if long_range + default_fc = range(0, 19, 6) + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(6, 721, 6)), + ) + elif varinput == 2: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(24, 721, 24)), + ) + else: + raise ValueError("varinput") + elif runinput == 5: # if analysis_assim (simplest form) + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(19)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(20)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(3)), + ) + elif runinput == 6: # if analysis_assim_extend + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + elif runinput == 7: # if analysis_assim_extend_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 8: # if analysis_assim_long + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + elif runinput == 9: # if analysis_assim_long_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 10: # if analysis_assim_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(21)), + select_lead_time(lead_time, range(3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + else: + raise ValueError("run error") + + r = [] + for _dt, _fc, _fh in prod: + r.append( + makename( + _dt, + run_name, + var_name, + _fc, + _fh, + geography, + run_t, + fhp, + runsuff, + vsuff, + rtsuff, + urlbase_prefix, + ) + ) + return r + + +def main(): + + + start_date = "201809170000" + end_date = "201809172300" + fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] + lead_time = [1, 2, 240] + # fcst_cycle = None # Retrieves a full day for each day within the range given. + runinput = 1 + varinput = 1 + geoinput = 1 + meminput = 0 + urlbaseinput = 3 + + file_list = create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date, + end_date, + fcst_cycle, + urlbaseinput, + ) + if len(file_list) == 0: + print(f"No files found") + else: + print(f"Files: {file_list}\nTotal files: {len(file_list)}") + valid_file_list = check_valid_urls(file_list) + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + + with open("filenamelist.txt", "w") as file: + for item in valid_file_list: + file.write(f"{item}\n") + + +if __name__ == "__main__": + start = time.time() + main() + print(time.time() - start) diff --git a/nwm_filenames/operational_aws/upload.py b/nwm_filenames/operational_aws/upload.py new file mode 100644 index 0000000..62fe09f --- /dev/null +++ b/nwm_filenames/operational_aws/upload.py @@ -0,0 +1,52 @@ +from concurrent.futures import ProcessPoolExecutor +import requests +import boto3 +from kerchunk.hdf import SingleHdf5ToZarr +import fsspec +import json +import os + +AWS_ACCESS_KEY_ID = 'AKIA4P7DSRJWW4TWOXVA' +AWS_SECRET_ACCESS_KEY = 'Gr9dS0Rrq8KmB8937honqzZDT06MXCy/j0H+VS4t' +BUCKET = "ciroh-nwm-zarr-copy" + +def download_and_convert_and_upload(filename): + + fileurl = filename.replace("\n","") + + text_split = fileurl.split("/") + filename = text_split[-1] + justname = filename[:-3] + sub_folder = text_split[-2] + date_folder = text_split[-3] + bucket_name = text_split[-4] + + s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) + + res = requests.get(fileurl, allow_redirects=True) + with open(f"./tmp{justname}.nc", "w+b") as file: + + file.write(res.content) + converted_data = SingleHdf5ToZarr(file, fileurl).translate() + with open(f"./{justname}.json", "w") as file2: + json.dump(converted_data,file2) + with open(f"./{justname}.json", "r") as file2: + s3.upload_file(f"./{justname}.json",BUCKET,f"{bucket_name}/{date_folder}/{sub_folder}/{filename}.json") + + + os.remove(f"./{justname}.json") + os.remove(f"./tmp{justname}.nc") + +def main(): + + with open("./filenamelist.txt") as file: + + lines = file.readlines() + + with ProcessPoolExecutor() as executor: + + executor.map(download_and_convert_and_upload, lines) + + +if __name__ == "__main__": + main() diff --git a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py new file mode 100644 index 0000000..e282a79 --- /dev/null +++ b/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py @@ -0,0 +1,493 @@ +from gevent import monkey +monkey.patch_all() +import gevent +from dateutil import rrule +from datetime import datetime, timezone +from itertools import product +#from filename_helpers import check_valid_urls +import time +import requests + +rundict = { + 1: "short_range", + 2: "medium_range", + 3: "medium_range_no_da", + 4: "long_range", + 5: "analysis_assim", + 6: "analysis_assim_extend", + 7: "analysis_assim_extend_no_da", + 8: "analysis_assim_long", + 9: "analysis_assim_long_no_da", + 10: "analysis_assim_no_da", + 11: "short_range_no_da", +} +memdict = { + 1: "mem_1", + 2: "mem_2", + 3: "mem_3", + 4: "mem_4", + 5: "mem_5", + 6: "mem_6", + 7: "mem_7", +} +vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} +geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} + + +def selectvar(vardict, varinput): + return vardict[varinput] + + +def selectgeo(geodict, geoinput): + return geodict[geoinput] + + +def selectrun(rundict, runinput): + return rundict[runinput] + + +def makename( + date, + run_name, + var_name, + fcst_cycle, + fcst_hour, + geography, + run_type, + fhprefix="", + runsuffix="", + varsuffix="", + run_typesuffix="", + urlbase_prefix="", +): + """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" + + datetxt = f"nwm.{date.strftime('%Y%m%d')}" + foldertxt = f"{run_type}{run_typesuffix}" + filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" + return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" + + +# setting run_type +def run_type(runinput, varinput, geoinput, default=""): + """This function takes the numeric command line input and converts to the text used in the url.""" + + if varinput == 5: # if forcing + if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii + return "forcing_analysis_assim_hawaii" + elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico + return "forcing_analysis_assim_puertorico" + elif runinput == 1 and geoinput == 2: # if short range and hawaii + return "forcing_short_range_hawaii" + elif runinput == 1 and geoinput == 3: # if short range and puerto rico + return "forcing_short_range_puertorico" + elif runinput == 5: # if analysis assim + return "forcing_analysis_assim" + elif runinput == 6: # if analysis_assim_extend + return "forcing_analysis_assim_extend" + elif runinput == 2: # if medium_range + return "forcing_medium_range" + elif runinput == 1: # if short range + return "forcing_short_range" + + elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico + return "analysis_assim_puertorico" + + elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico + return "analysis_assim_puertorico_no_da" + + elif runinput == 1 and geoinput == 3: # if short_range and puerto rico + return "short_range_puertorico" + + elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico + return "short_range_puertorico_no_da" + + else: + return default + + +def fhprefix(runinput): + if 4 <= runinput <= 10: + return "tm" + return "f" + + +def varsuffix(meminput): + if meminput in range(1, 8): + return f"_{meminput}" + else: + return "" + + +def run_typesuffix(meminput): + if meminput in range(1, 8): + return f"_mem{meminput}" + else: + return "" + + +def select_forecast_cycle(fcst_cycle=None, default=None): + if fcst_cycle: + return fcst_cycle + else: + return default + + +def select_lead_time(lead_time=None, default=None): + if lead_time: + return lead_time + else: + return default + + +urlbasedict = { + 0: "", + 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", + 3: "https://storage.googleapis.com/national-water-model/", + 4: "https://storage.cloud.google.com/national-water-model/", + 5: "gs://national-water-model/", + 6: "gcs://national-water-model/", + 7: "https://noaa-nwm-pds.s3.amazonaws.com/", +} + + +def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): + if urlbaseinput: + return urlbasedict[urlbaseinput] + else: + return defaulturlbase + + +def create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date=None, + end_date=None, + fcst_cycle=None, + urlbaseinput=None, + lead_time=None, # TODO: change this order; placed here to avoid breaking change +): + """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" + + runsuff = "" + + try: + geography = selectgeo(geodict, geoinput) + except: + geography = "geography_error" + try: + run_name = selectrun(rundict, runinput) + except: + run_name = "run_error" + try: + var_name = selectvar(vardict, varinput) + except: + var_name = "variable_error" + try: + urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) + except: + urlbase_prefix = "urlbase_error" + + try: + _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") + _until = datetime.strptime(end_date, "%Y%m%d%H%M") + except: + today = datetime.now(timezone.utc) + _dtstart = today + _until = today + + dates = rrule.rrule( + rrule.DAILY, + dtstart=_dtstart, + until=_until, + ) + run_t = run_type(runinput, varinput, geoinput, run_name) + fhp = fhprefix(runinput) + vsuff = varsuffix(meminput) + rtsuff = run_typesuffix(meminput) + + if runinput == 1: # if short_range + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + elif geoinput == 3: # puertorico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [6]), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif geoinput == 3: # if puerto rico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif runinput == 2: # if medium_range + if varinput == 5: # if forcing + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 19, 6)), + select_lead_time(lead_time, range(1, 241)), + ) + else: + default_fc = range(0, 19, 6) + if meminput == 1: + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 241)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 241, 3)), + ) + else: + raise ValueError("varinput") + elif meminput in range(2, 8): + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 205)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 205, 3)), + ) + else: + raise ValueError("varinput") + else: + raise ValueError("meminput") + elif runinput == 3: # if medium_range_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 6)), + select_lead_time(lead_time, range(3, 240, 3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 4: # if long_range + default_fc = range(0, 19, 6) + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(6, 721, 6)), + ) + elif varinput == 2: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(24, 721, 24)), + ) + else: + raise ValueError("varinput") + elif runinput == 5: # if analysis_assim (simplest form) + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(19)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(20)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(3)), + ) + elif runinput == 6: # if analysis_assim_extend + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + elif runinput == 7: # if analysis_assim_extend_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 8: # if analysis_assim_long + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + elif runinput == 9: # if analysis_assim_long_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 10: # if analysis_assim_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(21)), + select_lead_time(lead_time, range(3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + else: + raise ValueError("run error") + + r = [] + for _dt, _fc, _fh in prod: + r.append( + makename( + _dt, + run_name, + var_name, + _fc, + _fh, + geography, + run_t, + fhp, + runsuff, + vsuff, + rtsuff, + urlbase_prefix, + ) + ) + return r + + +def main(): + + start_date = "201809170000" + end_date = "201809172300" + fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] + lead_time = [1, 2, 240] + # fcst_cycle = None # Retrieves a full day for each day within the range given. + runinput = 1 + varinput = 5 + geoinput = 1 + meminput = 0 + urlbaseinput = 3 + + file_list = create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date, + end_date, + fcst_cycle, + urlbaseinput, + ) + if len(file_list) == 0: + print(f"No files found") + else: + #print(f"Files: {file_list}\nTotal files: {len(file_list)}") + paths = link_search_algo(file_list) + tasks = [gevent.spawn(check_directory,path) for path in paths] + gevent.joinall(tasks) + all_links = [] + for task in tasks: + l = task.value + for link in l: + if "https://storage.googleapis.com/national-water-model/"+link in file_list: + all_links.append("https://storage.googleapis.com/national-water-model/"+link) + + print(f"{len(all_links)} files found") + #valid_file_list = check_valid_urls(file_list) + #print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + + with open("filenamelist.txt", "w") as file: + for item in all_links: + file.write(f"{item}\n") + print("Success") + +def link_search_algo(file_list): + paths = [] + for file_name in file_list: + text_sp = file_name.split("/") + path = "/".join(text_sp[-3:-1])+"/" + if not path in paths: + paths.append(path) + return paths + +DOWNLOAD_LINK = "https://storage.googleapis.com/download/storage/v1/b/national-water-model/o/" +URL = "https://storage.googleapis.com/storage/v1/b/national-water-model/o?delimiter=/&prefix=" +TOKEN_PREFIX = "&pageToken=" + +def check_directory(path): + try: + json = requests.get(URL+path).json() + except Exception as e: + print(f"Couldn't fetch {URL}{path}") + try: + found_files = [i["name"] for i in json["items"]] + except: + return [] + + if "nextPageToken" in json: + return loop_check(URL+path,json["nextPageToken"]) + found_files + return found_files + +def loop_check(url,token): + try: + json = requests.get(url+TOKEN_PREFIX+token).json() + except Exception as e: + print(f"Couldn't fetch {url}") + + found_files = [i["name"] for i in json["items"]] + + if "nextPageToken" in json: + return loop_check(url,json["nextPageToken"]) + found_files + return found_files + + +if __name__ == "__main__": + start = time.time() + main() + print(time.time() - start) diff --git a/nwm_filenames/operational_aws_api/filename_helpers.py b/nwm_filenames/operational_aws_api/filename_helpers.py new file mode 100644 index 0000000..e04b2c1 --- /dev/null +++ b/nwm_filenames/operational_aws_api/filename_helpers.py @@ -0,0 +1,38 @@ +#from concurrent.futures import ThreadPoolExecutor +import gevent +import requests +from functools import partial +from tqdm import tqdm + +def check_valid_urls(file_list, session=None): + """if not session: + session = requests.Session()""" + t = tqdm(range(len(file_list))) + check_url_part = partial(check_url, t) + """with ThreadPoolExecutor(max_workers=10) as executor: + valid_file_list = list(executor.map(check_url_part, file_list))""" + valid_file_list = [gevent.spawn(check_url_part, file_name) for file_name in file_list] + gevent.joinall(valid_file_list) + return [file.get() for file in valid_file_list if file.get() is not None] + + +def check_url(t, file): + filename = file.split("/")[-1] + try: + with requests.head(file) as response: + if response.status_code == 200: + t.set_description(f"Found: {filename}") + t.update(1) + t.refresh() + return file + else: + t.set_description(f"Not Found: {filename}") + t.update(1) + t.refresh() + return None + #response = session.head(file, timeout=1) + except requests.exceptions.RequestException: + t.set_description(f"Not Found: {filename}") + t.update(1) + t.refresh() + return None diff --git a/nwm_filenames/operational_aws_api/listofnwmfilenames.py b/nwm_filenames/operational_aws_api/listofnwmfilenames.py new file mode 100644 index 0000000..e282a79 --- /dev/null +++ b/nwm_filenames/operational_aws_api/listofnwmfilenames.py @@ -0,0 +1,493 @@ +from gevent import monkey +monkey.patch_all() +import gevent +from dateutil import rrule +from datetime import datetime, timezone +from itertools import product +#from filename_helpers import check_valid_urls +import time +import requests + +rundict = { + 1: "short_range", + 2: "medium_range", + 3: "medium_range_no_da", + 4: "long_range", + 5: "analysis_assim", + 6: "analysis_assim_extend", + 7: "analysis_assim_extend_no_da", + 8: "analysis_assim_long", + 9: "analysis_assim_long_no_da", + 10: "analysis_assim_no_da", + 11: "short_range_no_da", +} +memdict = { + 1: "mem_1", + 2: "mem_2", + 3: "mem_3", + 4: "mem_4", + 5: "mem_5", + 6: "mem_6", + 7: "mem_7", +} +vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} +geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} + + +def selectvar(vardict, varinput): + return vardict[varinput] + + +def selectgeo(geodict, geoinput): + return geodict[geoinput] + + +def selectrun(rundict, runinput): + return rundict[runinput] + + +def makename( + date, + run_name, + var_name, + fcst_cycle, + fcst_hour, + geography, + run_type, + fhprefix="", + runsuffix="", + varsuffix="", + run_typesuffix="", + urlbase_prefix="", +): + """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" + + datetxt = f"nwm.{date.strftime('%Y%m%d')}" + foldertxt = f"{run_type}{run_typesuffix}" + filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" + return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" + + +# setting run_type +def run_type(runinput, varinput, geoinput, default=""): + """This function takes the numeric command line input and converts to the text used in the url.""" + + if varinput == 5: # if forcing + if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii + return "forcing_analysis_assim_hawaii" + elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico + return "forcing_analysis_assim_puertorico" + elif runinput == 1 and geoinput == 2: # if short range and hawaii + return "forcing_short_range_hawaii" + elif runinput == 1 and geoinput == 3: # if short range and puerto rico + return "forcing_short_range_puertorico" + elif runinput == 5: # if analysis assim + return "forcing_analysis_assim" + elif runinput == 6: # if analysis_assim_extend + return "forcing_analysis_assim_extend" + elif runinput == 2: # if medium_range + return "forcing_medium_range" + elif runinput == 1: # if short range + return "forcing_short_range" + + elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico + return "analysis_assim_puertorico" + + elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico + return "analysis_assim_puertorico_no_da" + + elif runinput == 1 and geoinput == 3: # if short_range and puerto rico + return "short_range_puertorico" + + elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico + return "short_range_puertorico_no_da" + + else: + return default + + +def fhprefix(runinput): + if 4 <= runinput <= 10: + return "tm" + return "f" + + +def varsuffix(meminput): + if meminput in range(1, 8): + return f"_{meminput}" + else: + return "" + + +def run_typesuffix(meminput): + if meminput in range(1, 8): + return f"_mem{meminput}" + else: + return "" + + +def select_forecast_cycle(fcst_cycle=None, default=None): + if fcst_cycle: + return fcst_cycle + else: + return default + + +def select_lead_time(lead_time=None, default=None): + if lead_time: + return lead_time + else: + return default + + +urlbasedict = { + 0: "", + 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", + 3: "https://storage.googleapis.com/national-water-model/", + 4: "https://storage.cloud.google.com/national-water-model/", + 5: "gs://national-water-model/", + 6: "gcs://national-water-model/", + 7: "https://noaa-nwm-pds.s3.amazonaws.com/", +} + + +def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): + if urlbaseinput: + return urlbasedict[urlbaseinput] + else: + return defaulturlbase + + +def create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date=None, + end_date=None, + fcst_cycle=None, + urlbaseinput=None, + lead_time=None, # TODO: change this order; placed here to avoid breaking change +): + """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" + + runsuff = "" + + try: + geography = selectgeo(geodict, geoinput) + except: + geography = "geography_error" + try: + run_name = selectrun(rundict, runinput) + except: + run_name = "run_error" + try: + var_name = selectvar(vardict, varinput) + except: + var_name = "variable_error" + try: + urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) + except: + urlbase_prefix = "urlbase_error" + + try: + _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") + _until = datetime.strptime(end_date, "%Y%m%d%H%M") + except: + today = datetime.now(timezone.utc) + _dtstart = today + _until = today + + dates = rrule.rrule( + rrule.DAILY, + dtstart=_dtstart, + until=_until, + ) + run_t = run_type(runinput, varinput, geoinput, run_name) + fhp = fhprefix(runinput) + vsuff = varsuffix(meminput) + rtsuff = run_typesuffix(meminput) + + if runinput == 1: # if short_range + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + elif geoinput == 3: # puertorico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [6]), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif geoinput == 3: # if puerto rico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif runinput == 2: # if medium_range + if varinput == 5: # if forcing + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 19, 6)), + select_lead_time(lead_time, range(1, 241)), + ) + else: + default_fc = range(0, 19, 6) + if meminput == 1: + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 241)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 241, 3)), + ) + else: + raise ValueError("varinput") + elif meminput in range(2, 8): + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 205)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 205, 3)), + ) + else: + raise ValueError("varinput") + else: + raise ValueError("meminput") + elif runinput == 3: # if medium_range_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 6)), + select_lead_time(lead_time, range(3, 240, 3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 4: # if long_range + default_fc = range(0, 19, 6) + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(6, 721, 6)), + ) + elif varinput == 2: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(24, 721, 24)), + ) + else: + raise ValueError("varinput") + elif runinput == 5: # if analysis_assim (simplest form) + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(19)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(20)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(3)), + ) + elif runinput == 6: # if analysis_assim_extend + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + elif runinput == 7: # if analysis_assim_extend_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 8: # if analysis_assim_long + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + elif runinput == 9: # if analysis_assim_long_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 10: # if analysis_assim_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(21)), + select_lead_time(lead_time, range(3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + else: + raise ValueError("run error") + + r = [] + for _dt, _fc, _fh in prod: + r.append( + makename( + _dt, + run_name, + var_name, + _fc, + _fh, + geography, + run_t, + fhp, + runsuff, + vsuff, + rtsuff, + urlbase_prefix, + ) + ) + return r + + +def main(): + + start_date = "201809170000" + end_date = "201809172300" + fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] + lead_time = [1, 2, 240] + # fcst_cycle = None # Retrieves a full day for each day within the range given. + runinput = 1 + varinput = 5 + geoinput = 1 + meminput = 0 + urlbaseinput = 3 + + file_list = create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date, + end_date, + fcst_cycle, + urlbaseinput, + ) + if len(file_list) == 0: + print(f"No files found") + else: + #print(f"Files: {file_list}\nTotal files: {len(file_list)}") + paths = link_search_algo(file_list) + tasks = [gevent.spawn(check_directory,path) for path in paths] + gevent.joinall(tasks) + all_links = [] + for task in tasks: + l = task.value + for link in l: + if "https://storage.googleapis.com/national-water-model/"+link in file_list: + all_links.append("https://storage.googleapis.com/national-water-model/"+link) + + print(f"{len(all_links)} files found") + #valid_file_list = check_valid_urls(file_list) + #print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + + with open("filenamelist.txt", "w") as file: + for item in all_links: + file.write(f"{item}\n") + print("Success") + +def link_search_algo(file_list): + paths = [] + for file_name in file_list: + text_sp = file_name.split("/") + path = "/".join(text_sp[-3:-1])+"/" + if not path in paths: + paths.append(path) + return paths + +DOWNLOAD_LINK = "https://storage.googleapis.com/download/storage/v1/b/national-water-model/o/" +URL = "https://storage.googleapis.com/storage/v1/b/national-water-model/o?delimiter=/&prefix=" +TOKEN_PREFIX = "&pageToken=" + +def check_directory(path): + try: + json = requests.get(URL+path).json() + except Exception as e: + print(f"Couldn't fetch {URL}{path}") + try: + found_files = [i["name"] for i in json["items"]] + except: + return [] + + if "nextPageToken" in json: + return loop_check(URL+path,json["nextPageToken"]) + found_files + return found_files + +def loop_check(url,token): + try: + json = requests.get(url+TOKEN_PREFIX+token).json() + except Exception as e: + print(f"Couldn't fetch {url}") + + found_files = [i["name"] for i in json["items"]] + + if "nextPageToken" in json: + return loop_check(url,json["nextPageToken"]) + found_files + return found_files + + +if __name__ == "__main__": + start = time.time() + main() + print(time.time() - start) diff --git a/nwm_filenames/operational_aws_api/upload.py b/nwm_filenames/operational_aws_api/upload.py new file mode 100644 index 0000000..62fe09f --- /dev/null +++ b/nwm_filenames/operational_aws_api/upload.py @@ -0,0 +1,52 @@ +from concurrent.futures import ProcessPoolExecutor +import requests +import boto3 +from kerchunk.hdf import SingleHdf5ToZarr +import fsspec +import json +import os + +AWS_ACCESS_KEY_ID = 'AKIA4P7DSRJWW4TWOXVA' +AWS_SECRET_ACCESS_KEY = 'Gr9dS0Rrq8KmB8937honqzZDT06MXCy/j0H+VS4t' +BUCKET = "ciroh-nwm-zarr-copy" + +def download_and_convert_and_upload(filename): + + fileurl = filename.replace("\n","") + + text_split = fileurl.split("/") + filename = text_split[-1] + justname = filename[:-3] + sub_folder = text_split[-2] + date_folder = text_split[-3] + bucket_name = text_split[-4] + + s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) + + res = requests.get(fileurl, allow_redirects=True) + with open(f"./tmp{justname}.nc", "w+b") as file: + + file.write(res.content) + converted_data = SingleHdf5ToZarr(file, fileurl).translate() + with open(f"./{justname}.json", "w") as file2: + json.dump(converted_data,file2) + with open(f"./{justname}.json", "r") as file2: + s3.upload_file(f"./{justname}.json",BUCKET,f"{bucket_name}/{date_folder}/{sub_folder}/{filename}.json") + + + os.remove(f"./{justname}.json") + os.remove(f"./tmp{justname}.nc") + +def main(): + + with open("./filenamelist.txt") as file: + + lines = file.readlines() + + with ProcessPoolExecutor() as executor: + + executor.map(download_and_convert_and_upload, lines) + + +if __name__ == "__main__": + main() From 6588065b58c55215484e4e7c7272a34382bfd59c Mon Sep 17 00:00:00 2001 From: RohanSunkarapalli <58287801+RohanSunkarapalli@users.noreply.github.com> Date: Thu, 12 Oct 2023 02:50:36 -0500 Subject: [PATCH 12/17] Delete nwm_filenames/operational_aws_api/.ipynb_checkpoints directory --- .../listofnwmfilenames-checkpoint.py | 493 ------------------ 1 file changed, 493 deletions(-) delete mode 100644 nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py diff --git a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py deleted file mode 100644 index e282a79..0000000 --- a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py +++ /dev/null @@ -1,493 +0,0 @@ -from gevent import monkey -monkey.patch_all() -import gevent -from dateutil import rrule -from datetime import datetime, timezone -from itertools import product -#from filename_helpers import check_valid_urls -import time -import requests - -rundict = { - 1: "short_range", - 2: "medium_range", - 3: "medium_range_no_da", - 4: "long_range", - 5: "analysis_assim", - 6: "analysis_assim_extend", - 7: "analysis_assim_extend_no_da", - 8: "analysis_assim_long", - 9: "analysis_assim_long_no_da", - 10: "analysis_assim_no_da", - 11: "short_range_no_da", -} -memdict = { - 1: "mem_1", - 2: "mem_2", - 3: "mem_3", - 4: "mem_4", - 5: "mem_5", - 6: "mem_6", - 7: "mem_7", -} -vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} -geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} - - -def selectvar(vardict, varinput): - return vardict[varinput] - - -def selectgeo(geodict, geoinput): - return geodict[geoinput] - - -def selectrun(rundict, runinput): - return rundict[runinput] - - -def makename( - date, - run_name, - var_name, - fcst_cycle, - fcst_hour, - geography, - run_type, - fhprefix="", - runsuffix="", - varsuffix="", - run_typesuffix="", - urlbase_prefix="", -): - """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" - - datetxt = f"nwm.{date.strftime('%Y%m%d')}" - foldertxt = f"{run_type}{run_typesuffix}" - filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" - return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" - - -# setting run_type -def run_type(runinput, varinput, geoinput, default=""): - """This function takes the numeric command line input and converts to the text used in the url.""" - - if varinput == 5: # if forcing - if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii - return "forcing_analysis_assim_hawaii" - elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico - return "forcing_analysis_assim_puertorico" - elif runinput == 1 and geoinput == 2: # if short range and hawaii - return "forcing_short_range_hawaii" - elif runinput == 1 and geoinput == 3: # if short range and puerto rico - return "forcing_short_range_puertorico" - elif runinput == 5: # if analysis assim - return "forcing_analysis_assim" - elif runinput == 6: # if analysis_assim_extend - return "forcing_analysis_assim_extend" - elif runinput == 2: # if medium_range - return "forcing_medium_range" - elif runinput == 1: # if short range - return "forcing_short_range" - - elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico - return "analysis_assim_puertorico" - - elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico - return "analysis_assim_puertorico_no_da" - - elif runinput == 1 and geoinput == 3: # if short_range and puerto rico - return "short_range_puertorico" - - elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico - return "short_range_puertorico_no_da" - - else: - return default - - -def fhprefix(runinput): - if 4 <= runinput <= 10: - return "tm" - return "f" - - -def varsuffix(meminput): - if meminput in range(1, 8): - return f"_{meminput}" - else: - return "" - - -def run_typesuffix(meminput): - if meminput in range(1, 8): - return f"_mem{meminput}" - else: - return "" - - -def select_forecast_cycle(fcst_cycle=None, default=None): - if fcst_cycle: - return fcst_cycle - else: - return default - - -def select_lead_time(lead_time=None, default=None): - if lead_time: - return lead_time - else: - return default - - -urlbasedict = { - 0: "", - 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", - 3: "https://storage.googleapis.com/national-water-model/", - 4: "https://storage.cloud.google.com/national-water-model/", - 5: "gs://national-water-model/", - 6: "gcs://national-water-model/", - 7: "https://noaa-nwm-pds.s3.amazonaws.com/", -} - - -def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): - if urlbaseinput: - return urlbasedict[urlbaseinput] - else: - return defaulturlbase - - -def create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date=None, - end_date=None, - fcst_cycle=None, - urlbaseinput=None, - lead_time=None, # TODO: change this order; placed here to avoid breaking change -): - """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" - - runsuff = "" - - try: - geography = selectgeo(geodict, geoinput) - except: - geography = "geography_error" - try: - run_name = selectrun(rundict, runinput) - except: - run_name = "run_error" - try: - var_name = selectvar(vardict, varinput) - except: - var_name = "variable_error" - try: - urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) - except: - urlbase_prefix = "urlbase_error" - - try: - _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") - _until = datetime.strptime(end_date, "%Y%m%d%H%M") - except: - today = datetime.now(timezone.utc) - _dtstart = today - _until = today - - dates = rrule.rrule( - rrule.DAILY, - dtstart=_dtstart, - until=_until, - ) - run_t = run_type(runinput, varinput, geoinput, run_name) - fhp = fhprefix(runinput) - vsuff = varsuffix(meminput) - rtsuff = run_typesuffix(meminput) - - if runinput == 1: # if short_range - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - elif geoinput == 3: # puertorico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [6]), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif geoinput == 3: # if puerto rico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif runinput == 2: # if medium_range - if varinput == 5: # if forcing - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 19, 6)), - select_lead_time(lead_time, range(1, 241)), - ) - else: - default_fc = range(0, 19, 6) - if meminput == 1: - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 241)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 241, 3)), - ) - else: - raise ValueError("varinput") - elif meminput in range(2, 8): - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 205)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 205, 3)), - ) - else: - raise ValueError("varinput") - else: - raise ValueError("meminput") - elif runinput == 3: # if medium_range_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 6)), - select_lead_time(lead_time, range(3, 240, 3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 4: # if long_range - default_fc = range(0, 19, 6) - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(6, 721, 6)), - ) - elif varinput == 2: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(24, 721, 24)), - ) - else: - raise ValueError("varinput") - elif runinput == 5: # if analysis_assim (simplest form) - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(19)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(20)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(3)), - ) - elif runinput == 6: # if analysis_assim_extend - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - elif runinput == 7: # if analysis_assim_extend_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 8: # if analysis_assim_long - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - elif runinput == 9: # if analysis_assim_long_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 10: # if analysis_assim_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(21)), - select_lead_time(lead_time, range(3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - else: - raise ValueError("run error") - - r = [] - for _dt, _fc, _fh in prod: - r.append( - makename( - _dt, - run_name, - var_name, - _fc, - _fh, - geography, - run_t, - fhp, - runsuff, - vsuff, - rtsuff, - urlbase_prefix, - ) - ) - return r - - -def main(): - - start_date = "201809170000" - end_date = "201809172300" - fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] - lead_time = [1, 2, 240] - # fcst_cycle = None # Retrieves a full day for each day within the range given. - runinput = 1 - varinput = 5 - geoinput = 1 - meminput = 0 - urlbaseinput = 3 - - file_list = create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date, - end_date, - fcst_cycle, - urlbaseinput, - ) - if len(file_list) == 0: - print(f"No files found") - else: - #print(f"Files: {file_list}\nTotal files: {len(file_list)}") - paths = link_search_algo(file_list) - tasks = [gevent.spawn(check_directory,path) for path in paths] - gevent.joinall(tasks) - all_links = [] - for task in tasks: - l = task.value - for link in l: - if "https://storage.googleapis.com/national-water-model/"+link in file_list: - all_links.append("https://storage.googleapis.com/national-water-model/"+link) - - print(f"{len(all_links)} files found") - #valid_file_list = check_valid_urls(file_list) - #print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") - - with open("filenamelist.txt", "w") as file: - for item in all_links: - file.write(f"{item}\n") - print("Success") - -def link_search_algo(file_list): - paths = [] - for file_name in file_list: - text_sp = file_name.split("/") - path = "/".join(text_sp[-3:-1])+"/" - if not path in paths: - paths.append(path) - return paths - -DOWNLOAD_LINK = "https://storage.googleapis.com/download/storage/v1/b/national-water-model/o/" -URL = "https://storage.googleapis.com/storage/v1/b/national-water-model/o?delimiter=/&prefix=" -TOKEN_PREFIX = "&pageToken=" - -def check_directory(path): - try: - json = requests.get(URL+path).json() - except Exception as e: - print(f"Couldn't fetch {URL}{path}") - try: - found_files = [i["name"] for i in json["items"]] - except: - return [] - - if "nextPageToken" in json: - return loop_check(URL+path,json["nextPageToken"]) + found_files - return found_files - -def loop_check(url,token): - try: - json = requests.get(url+TOKEN_PREFIX+token).json() - except Exception as e: - print(f"Couldn't fetch {url}") - - found_files = [i["name"] for i in json["items"]] - - if "nextPageToken" in json: - return loop_check(url,json["nextPageToken"]) + found_files - return found_files - - -if __name__ == "__main__": - start = time.time() - main() - print(time.time() - start) From 6da5c2658e84d53d188df8ef480ac293f7ed487e Mon Sep 17 00:00:00 2001 From: RohanSunkarapalli <58287801+RohanSunkarapalli@users.noreply.github.com> Date: Thu, 12 Oct 2023 02:50:59 -0500 Subject: [PATCH 13/17] Delete nwm_filenames/operational_aws/.ipynb_checkpoints directory --- .../listofnwmfilenames-checkpoint.py | 441 ------------------ 1 file changed, 441 deletions(-) delete mode 100644 nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py diff --git a/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py deleted file mode 100644 index 61552f2..0000000 --- a/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py +++ /dev/null @@ -1,441 +0,0 @@ -from gevent import monkey -monkey.patch_all() -from dateutil import rrule -from datetime import datetime, timezone -from itertools import product -from filename_helpers import check_valid_urls -import time - -rundict = { - 1: "short_range", - 2: "medium_range", - 3: "medium_range_no_da", - 4: "long_range", - 5: "analysis_assim", - 6: "analysis_assim_extend", - 7: "analysis_assim_extend_no_da", - 8: "analysis_assim_long", - 9: "analysis_assim_long_no_da", - 10: "analysis_assim_no_da", - 11: "short_range_no_da", -} -memdict = { - 1: "mem_1", - 2: "mem_2", - 3: "mem_3", - 4: "mem_4", - 5: "mem_5", - 6: "mem_6", - 7: "mem_7", -} -vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} -geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} - - -def selectvar(vardict, varinput): - return vardict[varinput] - - -def selectgeo(geodict, geoinput): - return geodict[geoinput] - - -def selectrun(rundict, runinput): - return rundict[runinput] - - -def makename( - date, - run_name, - var_name, - fcst_cycle, - fcst_hour, - geography, - run_type, - fhprefix="", - runsuffix="", - varsuffix="", - run_typesuffix="", - urlbase_prefix="", -): - """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" - - datetxt = f"nwm.{date.strftime('%Y%m%d')}" - foldertxt = f"{run_type}{run_typesuffix}" - filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" - return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" - - -# setting run_type -def run_type(runinput, varinput, geoinput, default=""): - """This function takes the numeric command line input and converts to the text used in the url.""" - - if varinput == 5: # if forcing - if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii - return "forcing_analysis_assim_hawaii" - elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico - return "forcing_analysis_assim_puertorico" - elif runinput == 1 and geoinput == 2: # if short range and hawaii - return "forcing_short_range_hawaii" - elif runinput == 1 and geoinput == 3: # if short range and puerto rico - return "forcing_short_range_puertorico" - elif runinput == 5: # if analysis assim - return "forcing_analysis_assim" - elif runinput == 6: # if analysis_assim_extend - return "forcing_analysis_assim_extend" - elif runinput == 2: # if medium_range - return "forcing_medium_range" - elif runinput == 1: # if short range - return "forcing_short_range" - - elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico - return "analysis_assim_puertorico" - - elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico - return "analysis_assim_puertorico_no_da" - - elif runinput == 1 and geoinput == 3: # if short_range and puerto rico - return "short_range_puertorico" - - elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico - return "short_range_puertorico_no_da" - - else: - return default - - -def fhprefix(runinput): - if 4 <= runinput <= 10: - return "tm" - return "f" - - -def varsuffix(meminput): - if meminput in range(1, 8): - return f"_{meminput}" - else: - return "" - - -def run_typesuffix(meminput): - if meminput in range(1, 8): - return f"_mem{meminput}" - else: - return "" - - -def select_forecast_cycle(fcst_cycle=None, default=None): - if fcst_cycle: - return fcst_cycle - else: - return default - - -def select_lead_time(lead_time=None, default=None): - if lead_time: - return lead_time - else: - return default - - -urlbasedict = { - 0: "", - 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", - 3: "https://storage.googleapis.com/national-water-model/", - 4: "https://storage.cloud.google.com/national-water-model/", - 5: "gs://national-water-model/", - 6: "gcs://national-water-model/", - 7: "https://noaa-nwm-pds.s3.amazonaws.com/", -} - - -def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): - if urlbaseinput: - return urlbasedict[urlbaseinput] - else: - return defaulturlbase - - -def create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date=None, - end_date=None, - fcst_cycle=None, - urlbaseinput=None, - lead_time=None, # TODO: change this order; placed here to avoid breaking change -): - """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" - - runsuff = "" - - try: - geography = selectgeo(geodict, geoinput) - except: - geography = "geography_error" - try: - run_name = selectrun(rundict, runinput) - except: - run_name = "run_error" - try: - var_name = selectvar(vardict, varinput) - except: - var_name = "variable_error" - try: - urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) - except: - urlbase_prefix = "urlbase_error" - - try: - _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") - _until = datetime.strptime(end_date, "%Y%m%d%H%M") - except: - today = datetime.now(timezone.utc) - _dtstart = today - _until = today - - dates = rrule.rrule( - rrule.DAILY, - dtstart=_dtstart, - until=_until, - ) - run_t = run_type(runinput, varinput, geoinput, run_name) - fhp = fhprefix(runinput) - vsuff = varsuffix(meminput) - rtsuff = run_typesuffix(meminput) - - if runinput == 1: # if short_range - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - elif geoinput == 3: # puertorico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [6]), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif geoinput == 3: # if puerto rico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif runinput == 2: # if medium_range - if varinput == 5: # if forcing - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 19, 6)), - select_lead_time(lead_time, range(1, 241)), - ) - else: - default_fc = range(0, 19, 6) - if meminput == 1: - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 241)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 241, 3)), - ) - else: - raise ValueError("varinput") - elif meminput in range(2, 8): - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 205)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 205, 3)), - ) - else: - raise ValueError("varinput") - else: - raise ValueError("meminput") - elif runinput == 3: # if medium_range_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 6)), - select_lead_time(lead_time, range(3, 240, 3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 4: # if long_range - default_fc = range(0, 19, 6) - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(6, 721, 6)), - ) - elif varinput == 2: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(24, 721, 24)), - ) - else: - raise ValueError("varinput") - elif runinput == 5: # if analysis_assim (simplest form) - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(19)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(20)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(3)), - ) - elif runinput == 6: # if analysis_assim_extend - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - elif runinput == 7: # if analysis_assim_extend_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 8: # if analysis_assim_long - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - elif runinput == 9: # if analysis_assim_long_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 10: # if analysis_assim_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(21)), - select_lead_time(lead_time, range(3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - else: - raise ValueError("run error") - - r = [] - for _dt, _fc, _fh in prod: - r.append( - makename( - _dt, - run_name, - var_name, - _fc, - _fh, - geography, - run_t, - fhp, - runsuff, - vsuff, - rtsuff, - urlbase_prefix, - ) - ) - return r - - -def main(): - - - start_date = "201809170000" - end_date = "201809172300" - fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] - lead_time = [1, 2, 240] - # fcst_cycle = None # Retrieves a full day for each day within the range given. - runinput = 1 - varinput = 1 - geoinput = 1 - meminput = 0 - urlbaseinput = 3 - - file_list = create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date, - end_date, - fcst_cycle, - urlbaseinput, - ) - if len(file_list) == 0: - print(f"No files found") - else: - print(f"Files: {file_list}\nTotal files: {len(file_list)}") - valid_file_list = check_valid_urls(file_list) - print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") - - with open("filenamelist.txt", "w") as file: - for item in valid_file_list: - file.write(f"{item}\n") - - -if __name__ == "__main__": - start = time.time() - main() - print(time.time() - start) From a765effb0e9e5739a113211afa757d3f51e44693 Mon Sep 17 00:00:00 2001 From: Rohan S Date: Sun, 15 Oct 2023 09:10:45 +0000 Subject: [PATCH 14/17] Test cases added using pytest --- .../filename_helpers.cpython-310.pyc | Bin 0 -> 1422 bytes .../listofnwmfilenames.cpython-310.pyc | Bin 0 -> 7837 bytes .../test_cases.cpython-310-pytest-7.2.0.pyc | Bin 0 -> 12175 bytes nwm_filenames/operational_aws/test_cases.py | 196 ++++++++++++++++++ .../listofnwmfilenames.cpython-310.pyc | Bin 0 -> 9496 bytes .../test_cases.cpython-310-pytest-7.2.0.pyc | Bin 0 -> 12179 bytes .../operational_aws_api/test_cases.py | 196 ++++++++++++++++++ 7 files changed, 392 insertions(+) create mode 100644 nwm_filenames/operational_aws/__pycache__/filename_helpers.cpython-310.pyc create mode 100644 nwm_filenames/operational_aws/__pycache__/listofnwmfilenames.cpython-310.pyc create mode 100644 nwm_filenames/operational_aws/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc create mode 100644 nwm_filenames/operational_aws/test_cases.py create mode 100644 nwm_filenames/operational_aws_api/__pycache__/listofnwmfilenames.cpython-310.pyc create mode 100644 nwm_filenames/operational_aws_api/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc create mode 100644 nwm_filenames/operational_aws_api/test_cases.py diff --git a/nwm_filenames/operational_aws/__pycache__/filename_helpers.cpython-310.pyc b/nwm_filenames/operational_aws/__pycache__/filename_helpers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdc10240cf48e22b9ee0f906911346cd4c359433 GIT binary patch literal 1422 zcmah|Pm3c(6t7p^>F#8*vr!S(gOG!V2|}XaL6&h4)IBY8Fo$Iuq-nZpCY|knQ&pMl zgwAOO)RTA?AvrF75kEs6^)z2VWEJ$i?nIUy&}!c6SFhfy_osfZymzn1aQ*S~2lA5+ zW53bn)#c#kGbDY2OfUf%3)?@0fP0?t+zA~Zos+q_8@eb5!WEqx2s^?P{tXK~0aG^W z-r^nXbOhF~Vr9}eL(w%CB2PG-?3RmvA_}b^Av4T?;WfJk`qtby#;y4^|BiLpH|z`q z2!0dP>_LsHN6vSv>3wcWb8@emYe0Vj;EB z<7sPiG)e$d*e}Y{LwXBIKR|ZQYW8)o2E#V+EvsS6f8ZB<%3iP$x8AwDl!dV!UB$}+ z6@2`m?VhEXjEXpyFCaWjW-@seEmRg!OBB`payprnxtz?)%d5DUh}gtYoFte}B%jB5 zl}SA*mU%??&^3BeR#L?VD~huyUh2vIB$~+#kMy{@vc2Oh)g~$P>g14;r%E2M)%$G% zmvNQ~N<)v2voeV@eS*T?;Z6%E%_-e7MAo#RzlIt%Y>PNpfa0yZYX#7gJLl4jxbo4# zx|Cp3Ys(0N>HygQsmW}}M!q5d*p7;eb7>u{RZ-5i`)|t>0+v~SUZz-hHgb2ZrL?WT zdr}L=-fydG`c8*!&@MVFifjk3_kfbrePjct2OQqyJvapQ4l3>dH9+O(Wd18Ms#?Gn z;rK3F^VMSm8|=guzCT_&wKI2w^Ng!;%{Hz;0RPB-;sjINcSL6=d0U1Wc)~b#(#7u@ zXX^F^QzWvO;;jbe{yey<+LQmk`)T91#8)|e36tBu zaozrc6|ek9<)RP|hqp8(tAkUVuAOGoCERDJvB7?*tUHsjum{@2W}%~`6f(T8rHO>p ziApPiu=N%dLSBZxl4nZlneEBviEPevIQXia&oB3?k*nUu$civ+gFWymq^`|E0)F5g-ti1UmH|GKAZrNK9E~xXPuXym!yBALiQ=6@G`z&PnIMmG1-;6vL(xs zBR^6eOET3dZmz3Zs(^EDzMHr5UCq)&S#=9t-O>e|cZ*%aG6bx-C9A{=OlL)Auo5e? z3ahdOR%457>0!w#vt_ozR@oXm#p-OGoqkxbD(nn9%g(VU*m-6?%v)7A?P?0I$*6m>wi*b9Kx*|Y3L_G|2`?AO^#>^IoU>}wBo>ojEjCi^<@XNp?- zvRBz{_6q9GvODY>kax~`f_?LW!vD@GJt{qxtn<#f5$4V@ld^NZE{!m!OuH+-#%p6* zgN7VyUEh1)90JO7-gljb66?$kov_n&Vx7`~?>RshdfaFIb_lT6-gDXyTKl%!VXZ!Q zgYCE!?D;%waogKd|t` zc{g-C7N410oyd!qk0D8XygsLLLcekhmGo*cGl+z+Q5s`cN@InR93?6xc}g@&3Y6%S z6e*#tJ}x1_26eivt4Ve($*w2ajU;<6$(~QLn~efr03WYWvPj7iCCikoP_l|7Hrji( z=Q(bRhnDKv9xcY2 z$VBqX?>6`RuG4(m-#@gyCbpX0vfFJZ2(Zw0x92)R(>v(4b~>)(*fxupe-NE z?@J#j!*Zm(FGYrcD=Zh~nEJlT@)59+{yy|TqyQ=kr~pW11>m(Pj~e}h>aZG>P+LZ+ z7*#}Cg)C^0hB5Hp*y{x5PTy*FtN|}v9$uT6Pg__ z^v!V3G3{Q@?X>Otu46)ln5a#*L5k#lk9VMmOj_oxfrboHt@?D1c09c^G0hpbSWSi+-`+?a0OMSqnG{6@pbQf7+TDLi&+hqEb_Zlk-*Z|Kh zRuVADe~Fc+e79KzhGu2T^ZG>2>#Qp37C>)J>MT~9^07%BV~e7033T41&ST4{!x$Q? zgYO;Pej#Qn5F5wqcO7UjY`Jciww~MZ9PBx$prbo+vI)f&?bBxMc!D2mwv{MdQpEAO zi5hJkvlofV6nZ+oa01OtoeE_dpHAtrRwfY?KQRwC6A-VZ_@n(0mf^%sP7LQt%A2*r z@p4+5>CG1`=Bx+2PSbJzX!ETZeqvYGGSV_wzE0d{=OYu=ZE*w|DsO<4U!deEVk$6a z$L_nwPYAyO1BEjC)SQsfCi|W~1e28)8r1|q8dXKr z1DTg_h~(rD!H-h0z40~j+lieP)1!plSl&qnA}0fx+C6cu`E!u@0G~M0OX#EWxvq%X zFZ6?sHTC0@vq&N-L|>RPQuZZYhAx*#tIzr%T!%Ff#&V03(fE-3IdaAT=x{~m*%^Kl zgzw<{gj|`bY{;r2>Q3wn|27C|%+vNPwFT*-N&YI3e4CPEqrQc@cd7n$N~rV&b4fEV zMoR`RlDHbdpNQnUN9;4dM*QEP+-$D7rmicd! z+^jcHbRCjSO7+Ji;4UTKMKZXx7lysy#b(px z_3sPkr|GrZJ58u7@_L%&V>btP{y&_45MG|z-sW4c-E9tTq;lXeaeLQ6!~QP1+v@~d zqQ!|@b@>4Hkzelm%yFB8=Z|x>UBAyJykEk1dRGi~uFNyL?URvDr47>LT*fq9?qRxu z>sxl$#^nP(pJ=tAbJ``xJkxn@a+}U@`Vc%dkEGNinQN)X8sQ9O);t%NCPrI0Xyr*k z93ET-6HPR!JiY}h$#$4ve}aYrI#xmrDg1WeOC4o~zICj95GdOiv7C0>|Lor!u3@O88sQt_j+g=g=-7__08(i+Pp3pMAjpz!@ek4#k)IYVOSH0R^DiMM8Smp%j+l+|k@;Fc zuqG?-7UZ0r$T=tE{Ogf9OlR&NQkj{Vt0Ckg)?zXnOXxwec0Zp(y#(r|bnd+WPRD`u z#-4sp;g`UjKugK~MxC6>L7$d0dvSqaYSsJa48{4m6w{tVMt0xLWc(C;sEIz@N~JL~ zb3QIiTYC)3i-Pja$!x>rwdt3oDCJ%3XmFh&8i-KF@E0okJA&&9C zBYBC8b|Qu7lA;hL0%I2lOCIm|9|`&`f<8tvxaoWD;bbb({)-v1&E%$G(jA><+C&6n z7Ecq7MCT*4Iu>1`rp9EYMdXd2B1rU*0&(I(GoYz5#Z)y;izpD^ARL8i_%oDTqQs_2~#67uUsbfQNK73VzvfKxm%&h^7KCvU~d-~QUSUcD!-C*p1(oO0neb8;z!AIo1O zh#bw>c++P>7#hVf z_;sqH<>6wN( z1wg|U02eO2+Go|gj_V;_g;J`kL4y97o+sE4u%ap8o7E;1vsxo>WH`&30Y2i^1&<*l z>5vPlTJ1}4x{`9%Ce#MnO=S`POqK>&M4e#Epwm7}dUO;@I7p5L?u0ri5q~n?EUqRf zfkR&in@HBRl}VY#PHpRoiWyI4k?7{UztBHdYy2IoQ5)a%p`%a!ukd$c0PuIovxOFA zxxbKy3UU>Bj^&34QV#R@G^Py;tT5C=1Sg|>q(y~^kExIJVG*8kaU?T}U2TR6)9D9? zcNS1l1T~EWkBhQO82HYh9vZ-xC@9Lxz#Ah9l_q$Veh`h6AFmC8hgLTbKrKDWivZ}t z2MDUl(mSueh2K#s-cgQzGToJ~O2N(nF!bIdUmbOg+IC#NcIE1=D=%EVc?Hj`bnW_; zD~io(x_z2UX(@KeXKhiE;%hr^Cz{Iw?~$lgMiXkZq#l zx`p^=EFZ+O(a^-ch;u;3d11yl{XmIT#Q0Hjz&oMSsBnsqaF-Hs;U|SOPm^&{S8ghEf_f~D;`F#jPZV`mk6cG%u) z*{*Aee{g-Wqd8!N(MYUB5re^B8s80sEwH9j-P&_p{3!`0Gb+LsWQ42&0yK1;O$?E+ zP?t!i_<fW$izdYPMecuUZk*IbGFNrJQ1nUl^zK4XCDqfmhMf a`XtvrHOd*<|CGM32l|sbjKPYme)d13(~Hdj literal 0 HcmV?d00001 diff --git a/nwm_filenames/operational_aws/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc b/nwm_filenames/operational_aws/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e260453176c33de2ba75b7f26f76a97ed49658a GIT binary patch literal 12175 zcmeHNYm8jURlc{Mx1aMKkKg0xy4Rjp_lzIQ+U%}38^R_I?6MouWYv4!b!VpSe!1># zd#AZAh4qSTkU%SuACWMU9f6PqN<<`r2%w0kKuG*SNa+3y64Lxb@;~zk-#K;fo$j9L z^^heINOY@CovJ!jb*gUFIj2rlE0;@a_$7buyft@7)Bc$f{l76JUP7?%>Y64rq1Uyt z&T*s>jcB^oh?V2IDoK8 zHO9;1jfwI^W3oKim?}>-rpwc)7ZdRhwDOEdh$P}!VTcss10pRli07=tt%DB_iR^|Z za^KJ&MMPeVeW2af%ZJ6dm;l8QF)5}H9~IML2Jta5D-Iw&F6P8R#Pi~iIE?rNS~&6z zy?he!QN*VZA47Z^@o~gwc${a;XT`iY0s80so==KX{MW>3amG3+&I(;Tx1q16#X0;7 zC_62lNB$}6lsM1%1>~Q_|AJURegXN5l)n)L_e)}#xj$zW#LDf6{39#z@cBoWS)sql zp-|kWFLI-xdxkI_%c(W2Le@*!R^6&PcTE|f8&)eoWvA&G4fD3uG#i#@AX#y?+Q8N~ z+tOOEz2~J+(eA7hnh%Qo^o(k`vfh$b)wG>Tb*oypym3FfZkeJ&On5n!(~^_uNeiSOW-1Y#guM|kob0L)sXruPu{bVqYFSHBrW$BuRhkhSI(SkZ(S_d>0k7b`K$AUEf?%_6c-Ej1DJ?IH;>yiVt?tyCwwI{4Kw*1x6??PQsS75pY}8vf&3c6wOsZm=>lU@( z|-ERm--)i`i(`EqkeXuffZYMqw|t+LmMn zF)s6-y~Hzw6>Zsz?JY0Q$#54ow5JTi9C#ooI3*nl&5?W zKs8N_iE$_PFwR2g9nd5kh#7ek7{mw*q48hW#e|p)soG3MqN>AGN>ojPYKqb%u7FQ7 zjuwE=Fb=T=e3o&DF5m|kk1;;Sc%1Qrj6;Bd<`Cma#t$=YFn)yb6yrx3Pcwdu@eJd~ z8P764&v=gU6O88xXKV}tl{g>w;|V{W^y4W%o`%3Z`5^JzBxYwxJ-V|jat}BL>ZjZT zBSdbL#4bV{A%T!YFc4A*X@m?y79od_M;JpGN0>mEM3_RD=}oiPK69h?FHk|eX(b78 zSt2k&U_j4Ns>x9j9%-u(agMmqp0ar9xbi8uL8zB`r!9a1?a3# z`L042%USw@3HpLiwkwj<0AVEpYXmMySjS45v*s<8N&l`fyu|Pd!Qu)-E-i9tkxPqQ ziZLaqyu{@tE-!I;iOW~H_UdZ4U)it1o}fyI)65w2DfcOv3v=Z@)m-e?xvTw&i?Tpt zW<$YOS}6^*QW|Jw zb)c2iQa_)oew)eySzS|Qe(KV&GS0E-5ZKqG%A;Dk^XD-Z~nHJy?+SExTru?O1D6HqCoxtyYLhB4w~01?HU?LErAcCT_`E zwS_vL3hU5W!%~Ai} zp*@=08P*Olw|Z?62ThhhwFifCU$&d_F?P!XxBql&d3TfsTv* z5wib~xkeOZpUD@9f-pK{ibUAPYfM2F;Ok7GEI>FgLW(ai1#CI_C8hv|Ht5KiaAC=D z3@c6;VU0G|fW-%Uk*I*72Am-elCYza~sM8oW0;LYzDfk5z#Kp=Qy!vZ0x^zBa& zPkfMhk+7=-t`T5W+kbkZPWG+wQu2#LUMBDh1inPzB?2!KAVnw76SzQN=$xfQdtgt{ zLC~h9_n#B%nm5AgQ2XYh6> z}f$jOf zV+-Vbk5{C!qMSz+tOa%UeU~tk0E=!mRU(y3+k8-_g-aTHzYAC1B;uh}shoA;(T{qN zg1{|+kqc5T1d2s@4n9vE z@Gu&{28Kaug{JR1!jpG>)j~e8K3BOu;41%|FDOU3%Fl*^^89`TC9mX9;3`)hygqIR z9s9ra3~qZNx9|TrxK)n*EGAnyhj#qAY{l;Kj&klk&)|Fda}V`-pNDQ*-l9)VM#nex zZ|kxRWaP?_O_b7=#J?gQ=A1{fi*wHXB%IIma~@s>vAK1JxUh#Arfo(7fR?h|<)!?8 z?&c@Bt4hJHgY6U?Htv&SI^9dc6`h2gXSjy_Eq`Gfu=6sMKV;{95H9Sm?t5Ve2L!lt zMfO30ZmUEN?&t*DeJS7WgHv~x-N!dqaDy5+FpwQHri8e>jBlZ&IA2=iC^@)P_&Dt) z{Zm*$?_MW+=W<~00NHKtDE}W!A%(zm)n@^b->{Gvwtfzx=Q12@m-(h-!1J|&V~6}n zltcV7gZo^y@RRLWK4kPLlmB_bcI^IFsN+80*p+^l&&uks0-Eew^?_3nE%4#s+xp+& zkcL!YPVGT9-1sQ_Y2Q67zSn{Gq4>7Hqun%X8W z^)`v5ZMvn}rcbs_Uu&C8$!#(v3sL3Q;7%oF7_7Ovmje%Uv(+#Kd5f*~VzXtMiyN)G zOYP20zDQeYR;%kvw`(>#!qwfMSzow^OJBLr zXbG#n)V(-dt6Fb$aNW&yKc?QH4RW;a-R9wq>gd9=L5DP`1+c%+#xU*DqS-LJttRgM zxYzD00bR8wTboSSHKkmrw%S{}$@gTWQ1bGFz7KC^FkLb0rhVJ=4)2+*TpixAp~7kl zm+W*!>C`tm^>y#)h;ro|8(F<=R%`3Es&`^U^&P*`F0PeV$U8DrQ)LHZI{9&>{?N|s zO*1^gXOIOW8JvTqN{S=vltz|} z?q_Xeor~VVy?9e=Y;vGTP3|B$=LO&7vpSv_>Dm`@&;F)%5BlRq?IX1J zS|J8+o^_{#`%k;T#n^uJV_; z4RT_?#%(Akc8saAi z+cV6|_y}J9J>cadUGfb4klg$u+y4_cx@&!|MwsgULxEl9{La-qTk&4HX5;agW5R&+ zQsEOnFGdgjaC^f}%^i+C$fX-bZC>8=$GIof5%{d25vv@N8W}sx)UKLAL};? z5&2!vcv0JtkT8+OWxr3_e8UnZ_2P!r#2%H`{t$KeeKLem?;X_B{%d=Yd)QuLdU?E)R*IRXA$$6R|;GL$JuQli2 zdgHS%%y$>l*Z!O1Yi{it4t+ZwPXD+2PB(FSbWh5XRX5}w%lTFDmttx zw33(J(Mn#nUn_a3P%EkHsXrzT$yKi&Whe*znmWhwO!1u2ba~4EGQ6z6i6Wa0i*w1Z z>vi4G-;WrvU)EzkkpDoHj7p_p)|!<{;k;)AFF|?Myn_|>c63pV9EyEWTK8nl!9EYO zyriGU^Xny474=YNl0-mj9Y-$`3$y~&b5`}}F^=xllNEAlsw)gW_mV=B>_=ml6rFq3 zS6w2kNMMz~bpS8cFy(D8gV(HR9^0d9@ogO6N3aiAHi))Oz$QS)ac^R0{?#?I?Tv>M z@8`(BBl5o`@Vf-a7zyW$b$x|;uQmI`PPleG<3*f%J9X9GRB$_F_LpS{bT=;P8efZe5?Ng5kOymV8z$uVl-%PJ_EJBlherC31%9 z8SZ+Cq0g6fkQ{>K9KZ>Zvy|*Qxd$gea8G&5OCFM^An=2OU;qON?uk!v;0xjyag?um zINaGa<#WQt^lx=_cU5(DRrNagd`80Wv%ma?^>0r}($A?9|K(752A}(mEJ;jaa!YE< zhAc~#yrpbQGPNl!$)H?qscp^BM4Q%1v=c_6oivi7o@k}ox}gu@(?(j1%NQA!WGSYz zG|R9o%dtEwup%q5i91;%%gSt$O|fY;OA>Ct>7Sg+0g) zvBP&%qri@^qi;(_ksV_XAuq9qSrz$2`jP05z0AJBj=wD%W%dd?!4^@PWHojYP*dy_ zJB@sronen6pJ7+oW9)H2%%b!JJB!jBdxSm7zR7-tJ;k18=h(N{Gk0`j9=M)m-$wsJ zT8aMHtL!;;9&P*B1@=7f?PuR%FCc$_T{I4`Uo{T07mW(L#9l(%gKUXiMt;aT%&y#0 z_}^LSt@O5J9I^Iqg2vX-ux=fhlQuznrd$o zBvl$Hl&F+wlq4ugQj(%XrzA}YiT@yr1j1^y>!-r(beNq9vq!`1u`qi)%$}&Gcmd;h zk&+T66O@!GnWSV2Nswu*nzn7V>fAH5mT5C1%`MmBYfh6J8PDR}Y})7yQp*l+H0_mY z;=X8uM8mPWu8~|d*Ue_r$aK0E_Z;4AI8}Aes0I-r`cav_=kR?>d-o+@o|mplAIM7| zNRXA(n#oxcQlX>Jv^!mo9{`w0KEGaDb=p?#rn9zT+BHa(SvMOE%XPtMv)yS~Zp~hA z*O!|u%QoAnIUS3eUeke;)XjBBNvBzBHC@kH2HXg2v9l4RU8`j^JWP6&mPTvTWqt_R zeb>+T)50F0gqBE)U$LBU@hI$BJbw@{(KP6;(zwHv97FPdnS^)ka1s>0Y7%sx7)?Uc z1yX~wX!Ij-A-N%5xT{E*>`PlX&)f2Q(p{yW^ONsMenyn@O!ZZ!y{EB+k6vGYPhv@5 zK`AXtDU>vpLVwavphdr1=okDfT63tS{k%vEz?HlzRWm*3)zzk3S?<~m;!XvU$-uoz zhg;AJ#8Hb?Je=MNIx3L0HHep6X?m5W?Ku^1)vB1CPOI54Z?vpRm$xct4Mid9?-bM$bmRGU8<>nhf z8kpSfvM55}hQ$KmC&;LdZQU?ks~(OrQbLQq;b9`D#Xt`fQ?xl0cFssBW_sJaC62%; z;w_!CQYj}tD$mLMNmLr7iJ~8sqa*453o5?MWT^h}i%3uiD+iW(8HteA~-TU5d@$*0}R|t;XZClV{5V>}nM6cDf zEyx_y&~8Z_3ZW3uE(vSX7URLQ#Zd9m=?Wegs?z$NJ0nz^LRSY5-Unt!r3!@_EJSee zn}MoIsWOW z$3N{}Nd<(m^J%KU(V#m%`cau~dZAG(D3C_g&~!`YSsWrcJVdaYG>A98YGNt8s|9%! zuN}zC;YQ?eBcr=doNN9laJKP@BVEQCm3z7(TEDaoI@Yv~PwAHOB@b(1%BHd=@f>uy zL|T1k4Z?hw24Eo9`6I|bAoGn3G8A-}BjfHFPNZ}3eM;s`LC(mUBHHe|7Jd66!=~`%tGn)JPTG(^Ps#2X zg6Z(~uS2M6T@(I^7{-=PV8996|jyA$Wz7 zSCRD2u6ka_{bsFZJ8hG>xS6cZqV1UG;)=6Y>vV4j1E^*<8p}1PE3$lQWK!39&;Nf$ zI<9wObobV-y>zwq`J)jY7);z;u`sc-g5`Fa?xL7+Xj+|E$Nk7pv>j%(YQ4wzj%u`= zE*p;hig61oVzXm$?ptZxzw=l$Ara>U$Z(=k=^ zN9I2acZ-r98=1u%rCoDOjaDVw9|}qci2f{0cTf%E&jZu+nuJm{!sO4!b|1&^Gr-OW z*k{JT77-39((Hn+vi-;2ESuewe?#_F%$#9!bk$&<{%FPZ-44I|3^eS|Yc zxe|gvf)_(-l(B-4cOQ>|E(5w8(a!$wXdmD=WcnS2AI9jgR1W1EZE}PMYnqJ7Vv)); ztNqV0h=XGxMk@!7xZLgF_-m}8B-U^?!p17G^YP$lwrxNj5|FPCsjcu=e#w~Micxq@ zjJ2Z?g3{$({K=2zx`nw83-}+8=K7tlHF7C7;}L=3uSOU~8US-29ZRoTZ1gcP`lIly zgC8G_@O&6s$(PUlNcY%!gbn#aBxfs9f6kvdFMWURduo5aztG?3&uz_l^ZvY_^>hA$ zzt7M3Wq;C7`&0h3pWlQIm8Ex-+aLRLFdZN6^Sd2wghL+su$~`YZeg{T37zqyFC2ZTs=&SVT+c*oD$%vQW-Gj= zMF>lTwdm>|mjSBhbMCl457&|dFVMsJT#H<4fw+(30frt=MmCsN@ zR{nq{6%I|0NL3M}sxF8q5I;hFA~N(4l@3vIkP^E8`RkOBT`!^&9b#0V+Ri%1^)Cf# z*K2UHR*b~?XD?p5F5DC08wjIZ*v*_w3Sr0cOH?95GswJdwz}2@;$DzMSOpPN5hbYZ z-{VXPDg_zACPZQ>CIVPd3Qg;4g!3vABZaL|9D|>xrWz&0BYu`T^?JnXWL+>jNO%r2 zH-dx^EF(c8gy;+cCPre7&=^UCe~_RP=OE0a)Xl9y@pk(!7Z#+ zL<{Cgj3aDM`U((;N@kdydRw%g@g|G5Pu4#)K&rp z{R=%o<%}pRNd@C}T0@AP)=VN38<|UHFpfs+VoXM0(t#IPlclfd)0JpsX$YOcyb&%M zzk@3YTm&8V<^Y%cB3#k#QaFO}Br!UK4r??%+;6$G>8TURtt zJftGQjrse9eorg$+u%_H-{?o0fb3sk@0P(l*t=xeLW?r>eYvk7*O03$(T6?SPawyG zP9IM?eceNF(ogtFKQ**5^-aB>hNYa|lvxVa;Bil3D9e)l7;I#e`Wbo#dK=--^JIr+ zJ#8!DXG#A;Yv=qdwDxR2@8@qOVbK;)EB1>l=ND%Y5QU5Vx}V!hg>@Qp`#ZOvf%Pr> zxlII5S^lm8-%joHnUg20b6?m05qfb)V+m!&4G^DS zM=~JieIPU`acDs5bRZPIK>RfT^m6gF_e#;*0zoYzKpV*Gft)c?SG(f9qM8vB9;kSz2omB#4oZkM-m1Hn z$s4P6v$f)Iig_5Rn|MhuTdg30>lv>sH_;eqc-X+eb>8%>YMI|c8*fo^iV~vIIMvNb zKL=@An^1(98T)sQglD?9TqBKH&~BrsQA;4V0*&f{MneJ(lePD1SB=Q>F42juP|}d3 z5h)M<6lL!j*FMW-GP16UfDvGm_()GGZz=y%)^eIIml4+5e=n!<-vi2BazGay$$`)U z=TX3EM2HP%XJ23O6+B+Z@H15=Z@~{?3R8bmnU}U@CeKQJ4LU$xlleg(55~SGP8V8m zZnPzS9JmHw+`D&gfbSjz2!hjUdVxmN7c|FfmlJ#VyCflx3>0KNlbA(i1!i_baT7PB9M?Q40B z)yl4Y&?%?A(AoKvEH`-(Z&p1Wt_#Gs8fxk=TKS2`YUcLO{g{AtHS1vq%@w7pG+Hi~j{`vlSu$ literal 0 HcmV?d00001 diff --git a/nwm_filenames/operational_aws_api/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc b/nwm_filenames/operational_aws_api/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc new file mode 100644 index 0000000000000000000000000000000000000000..272e3a1529e26bddd824bf856b7705c585772078 GIT binary patch literal 12179 zcmeHNYm8jURlc{Mx1aMKkKg0xy4Rjp_lzIQ+U%}38^Q*M?6MouWYv4!b!VpSe!1># zd#AZAh4qSTkU%SuACWMU9f6PqN<<`r2%w0kKuG*SNa+3y64Lxb@;~zk-#K;fo$j9L z^^heINOY@CovJ!jb*gUFIj2rlE0;@a_~m}@yft@7)Bc$f{l76JUP7?%>Y64rq1Uyt z&T*s>jcB^oh?V2IDoK8 zHO9;1jfwI^W3oKim?}>-rpwc)7ZdRhwDOEdh$P}!VTcss10pRli07=tt%DB_iR^|Z za^KJ&MMPeVeW2af%ZJ6dm;l8QF)5}H9~IML2Jta5D-Iw&F6P8R#Pi~iIE?rNS~&6z zy?he!QN*VZA47Z^@o~gwc${a;XT`iY0s80so==KX{MW>3amG3+&I(;Tx1q16#X0;7 zC_62lNB$}6lsM1%1>~Q_|AJURegXN5l)n)L_e)}#xj$zW#LDf6{39#z@cBoWS)sql zp-|kWFLI-xdxkI_%c(W2Le@*!R^6&PcTE|f8&)eoWvA&G4fD3uG#i#@AX#y?+Q8N~ z+tOOEz2~J+(eA7hnh%Qo^o(k`vfh$b)wG>Tb*oypym3FfZkeJ&On5n!(~^_uNeiSOW-1Y#guM|kob0L)sXruPu{bVqYFSHBrW$BuRhkhSI(SkZ(S_d>0k7b`K$AUEf?%_6c-Ej1DJ?IH;>yiVt?tyCwwI{4Kw*1x6??PQsS75pY}8vf&3c6wOsZm=>lU@( z|-ERm--)i`i(`EqkeXuffZYMqw|t+LmMn zF)s6-T`}9WC7va$YRg`1Z+Ur6hC68r-8_i^d`Fi{h(<18NpcZOL5?H#;?-8OW6Me4 za+*Mlzzl)I1dagUOL6W|$~Z>gIDvTrCkUJ*aEicb0%r)&uoYMYHoz@5`vEFlCQt;} zC$AZNbLmv1+1#-DIF3^@yEx|D|0#|kWq3B2#dWMMiQYcTOPDUhpS&{7Jgwe~3RBsGn9AQGF zdy|gtCSAi#xoJ1!X5E~dcgNgucfy?%nP1KJrbM%uv;cgDafmM9vy4N00YAWajPW_fb#&e9HU_4JaV`C7i#QC@%Px$eqA5Z!5G=%QS2Z`S%Q9Da2(w$|od%!VJ zKjj`6A$FrAdJ*CX34|nqfsjH-BV-V=2swm2!WhCh!UV!3!W6+0;pPRZg~2-7q5_Pcsjl5IBoldr6pg+EJCI(72=XqnU}(vR!UQ* zs1dTIyu7auD@xOLt-V@+pjs?07mLd)%PUKMuiPPtutjHixx3i6QMqJsN8zaS`ttG$ z*n#L-Y*u+M7j$zC<04hF3LT-;5Me`lXysB+NSI#;GFDQhKuN6y2xyerf}&CgX&@g9 z$yY!RrRHacq{Wc57?Kvlri)?QMXnLpCs%0Y6I^Q`k37St321*@vEoVbM?5|&9P(-2eIiLkcNehTYRz`XxiF8-=wmQSd<@nh4kwnV zv^oxm3i(28o2`jgQF)v9*3m%h!HTSJ*)`Z_wq0vfHqCoxtyYLhB4w~01?HU?LErAc zE^f(MwS_vL3hU5$! z%~Ai}p*@=08P*Olw|ZFhLW(ai1#CI_C8hv|Ht5)y zaAC=D3@c6;VU0G|fW-%Uk*I*71{@*}lCYza%nm5AgQ2 zXYh6>} zf$jOfV+-Vbk5{C!qMSz+tOa%UeU~tk0E=!mRU(y3+k8-_g-aTHzYAC1B;uh}shoA; z(T{qNg1{|+kqc5T1d2s@ z4n9X6@Gu&{28Kaug{JR1!jpG>)j~e8K6kl3;4c3eUr>&6m!Ay<<@x;xN?ysIz+J99 zczxUsI`@C;8Qk_lZr}fLaI2jASxmNa4(<4J*^1re9p&78p27F@=N{_wJ`dftyhWd! zjE-;W-_~Ut$jFr;n<%AgiGM{r%sG!{7w4S&NjRVB=RCX+Vsq;babXWLOxuhE04-&^ z%S-wH-0e?rUzLJg2iqw)Y}_Zubh?*>D>?}~&u|UI0`DTHwRM zxAnilAq}a*oZ5qIxbac;)5zVXy{yjWpC^o!7wML5+7)cu$+rn>Q67zSn{Gq4>7Hqu zn%X8W^)`v5ZMvn}rcbs_Uu&C8$!#(v3sL3Q;7%oF7_7Ovmje%Uv(+#Kd5f*~VzXtM ziyN)GOYP20zD!$cR;%kvw`(>#!qxqsSzow^ zi(k3WXbG#n)V(-dt6Fb$aOKT)Kc?QH4RW;a-R9wq>gd9=L5DP`1+c%+#xU*DqS-LJ zttRgNxYzD00bR8wTboSSHKkmrw%S{}$@gTWQ1bGFz7KC^FkLb0rhVJ=4)2+*TpixA zp~7kl7wvRS>C`tm^>y#)h;ro|8(F<=R%`3Es&`^U^&P*`F0PeV$U8DrQ)LHZI{9&> z{?N|sO*1^gXOIOW8JvTqN{S=v zltz|}?q_Xeor~VVy?9e=Y;vGTP3|B$=LO&7vpSv`>Dm`@&;F)%5BlRq z?IX1JS|J8+o^_{#`%k;T#n^ zuJV_;4RT_?#%(Akc8saAi+cV6|_y}J9J>cadUGfb4klg$u+y4_cx@&!|MwsgULxEl9{La-qTk&4HX5#^w zW5R&+QsFZ{FGi33aC^f}%^i+C$fX-bZC>8=$GIof5%{d25vv@N8W}sx)UKL zAL};?5&2!vcv0JtkT8+OWxr3_e8UnZ_2P!r#2%H`{t$KeeKLem?;X_B{%d=Yd)QuLdU?E)R*IRXA$$6R|;GL$J zuQli2dgHS%%y$>l*Z!O1Yi{it4t+ZwPXD+2PB(FSbWh5XRX5}w)mTFDmttxw33(J(Mn#nUn_a3P%EkHsXrzT$yKi&Whe*znmWhwO!1u2ba~4EGQ6z6i6Wa0 zi*w1Z>vi4G-;WrvU)EzkkpDoHj7p_p)|!<{;k;)AuR(d%yn_|>esocd9EyEWTK8nl z!9EYOyriGU^Xny474=YNl0-mj9Y-$`3$y~&lUDWUF^=xllNEAlsw)gW_mV=B>_=ml z6rFq3S6w2kNMMz~bpS8cFy(D8gBPu69^0d9@ogO6N3aiAHi))Oz$QS)ac^R0{?#?I z?Tv>M@8`(BBl5o`@Vf-a7zyW$b$x|;uQmI`PPleG<3*f%J9X9GRB$_F_LpS{bT=;P8efZe5?Ng+=9em literal 0 HcmV?d00001 diff --git a/nwm_filenames/operational_aws_api/test_cases.py b/nwm_filenames/operational_aws_api/test_cases.py new file mode 100644 index 0000000..272c4c4 --- /dev/null +++ b/nwm_filenames/operational_aws_api/test_cases.py @@ -0,0 +1,196 @@ +import pytest +from datetime import datetime +from listofnwmfilenames import ( + selectvar, + selectgeo, + selectrun, + makename, + run_type, + fhprefix, + varsuffix, + run_typesuffix, + select_forecast_cycle, + select_lead_time, + selecturlbase, + create_file_list, +) + + +def test_selectvar(): + assert selectvar({1: "channel_rt"}, 1) == "channel_rt" + + +def test_selectgeo(): + assert selectgeo({1: "conus"}, 1) == "conus" + + +def test_selectrun(): + assert selectrun({1: "short_range"}, 1) == "short_range" + + +def test_makename(): + assert makename( + datetime(2022, 1, 1, 0, 0, 0, 0), + "short_range", + "channel_rt", + 0, + 1, + "conus", + "forcing", + fhprefix="f", + runsuffix="_test", + varsuffix="_test", + run_typesuffix="_test", + urlbase_prefix="https://example.com/", + ) == "https://example.com/nwm.20220101/forcing_test/nwm.t00z.short_range_test.channel_rt_test.f001.conus.nc" + +@pytest.mark.parametrize("runinput, varinput, geoinput, expected_output", [ + (5, 5, 2, "forcing_analysis_assim_hawaii"), + (5, 5, 3, "forcing_analysis_assim_puertorico"), + (2, 5, 7, "forcing_medium_range"), + (1, 5, 7, "forcing_short_range"), + (1, 3, 3, "short_range_puertorico"), + (1, 5, 2, "forcing_short_range_hawaii"), + (1, 5, 3, "forcing_short_range_puertorico"), + (5, 5, 7, "forcing_analysis_assim"), + (6, 5, 7, "forcing_analysis_assim_extend"), + (5, 3, 3, "analysis_assim_puertorico"), + (10, 3, 3, "analysis_assim_puertorico_no_da"), + (1, 3, 3, "short_range_puertorico"), + (11, 3, 3, "short_range_puertorico_no_da"), + (2, 2, 2, "default_value") # Add a test case for default value +]) +def test_run_type(runinput, varinput, geoinput, expected_output): + assert run_type(runinput, varinput, geoinput, "default_value") == expected_output + + +def test_fhprefix(): + assert fhprefix(5) == "tm" + assert fhprefix(1) == "f" + assert fhprefix(10) == "tm" + + +def test_varsuffix(): + assert varsuffix(1) == "_1" + assert varsuffix(7) == "_7" + assert varsuffix(8) == "" + + +def test_run_typesuffix(): + assert run_typesuffix(1) == "_mem1" + assert run_typesuffix(7) == "_mem7" + assert run_typesuffix(8) == "" + + +def test_select_forecast_cycle(): + assert select_forecast_cycle(12, 0) == 12 + assert select_forecast_cycle(None, 0) == 0 + + +def test_select_lead_time(): + assert select_lead_time(240, 0) == 240 + assert select_lead_time(None, 0) == 0 + + +def test_selecturlbase(): + assert selecturlbase({1: "https://example.com/"}, 1) == "https://example.com/" + assert selecturlbase({1: "https://example.com/"}, 2, "default") == "default" + +fcst_cycle_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +lead_time_values = [1, 2, 240] +valid_base_urls = [ + "", + "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + "https://storage.googleapis.com/national-water-model/", + "https://storage.cloud.google.com/national-water-model/", + "gs://national-water-model/", + "gcs://national-water-model/", + "https://noaa-nwm-pds.s3.amazonaws.com/", + "https://ciroh-nwm-zarr-copy.s3.amazonaws.com/national-water-model/", +] + +valid_folder_names = [ + "analysis_assim", + "analysis_assim_alaska", + "analysis_assim_alaska_no_da", + "analysis_assim_coastal_atlgulf", + "analysis_assim_coastal_hawaii", + "analysis_assim_coastal_pacific", + "analysis_assim_coastal_puertorico", + "analysis_assim_extend", + "analysis_assim_extend_alaska", + "analysis_assim_extend_alaska_no_da", + "analysis_assim_extend_coastal_atlgulf", + "analysis_assim_extend_coastal_pacific", + "analysis_assim_extend_no_da", + "analysis_assim_hawaii", + "analysis_assim_hawaii_no_da", + "analysis_assim_long", + "analysis_assim_long_no_da", + "analysis_assim_no_da", + "analysis_assim_puertorico", + "analysis_assim_puertorico_no_da", + "forcing_analysis_assim", + "forcing_analysis_assim_alaska", + "forcing_analysis_assim_extend", + "forcing_analysis_assim_extend_alaska", + "forcing_analysis_assim_hawaii", + "forcing_analysis_assim_puertorico", + "forcing_medium_range", + "forcing_medium_range_alaska", + "forcing_medium_range_blend", + "forcing_medium_range_blend_alaska", + "forcing_short_range", + "forcing_short_range_alaska", + "forcing_short_range_hawaii", + "forcing_short_range_puertorico", + "long_range_mem1", + "long_range_mem2", + "long_range_mem3", + "long_range_mem4", + "medium_range_alaska_mem1", + "medium_range_alaska_mem2", + "medium_range_alaska_mem3", + "medium_range_alaska_mem4", + "medium_range_alaska_mem5", + "medium_range_alaska_mem6", + "medium_range_alaska_no_da", + "medium_range_blend", + "medium_range_blend_alaska", + "medium_range_blend_coastal_atlgulf", + "medium_range_blend_coastal_pacific", + "medium_range_coastal_atlgulf_mem1", + "short_range" +] +import requests + +def is_valid_url(url): + try: + response = requests.head(url) + return response.status_code < 400 + except requests.ConnectionError: + return False + +@pytest.mark.parametrize("runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output", [ + (1, 1, 1, 0, "201809170000", "201809172300", fcst_cycle_values, 3, None, ["expected_file_name_1"]), + (5, 5, 2, 1, "201809170000", "201809171200", fcst_cycle_values, 1, lead_time_values, ["expected_file_name_2"]), + (2, 5, 3, 3, "201809170600", "201809171800", fcst_cycle_values, 2, lead_time_values, ["expected_file_name_3"]), + +]) +def test_create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output): + file_list = create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time) + assert isinstance(file_list, list) + assert all(isinstance(file_name, str) for file_name in file_list) + for url in file_list: + # assert is_valid_url(url), f"Invalid URL: {url}" + assert any(substring in url for substring in valid_folder_names), f"No valid folder name found in URL: {url}" + + + # Check if all base URLs exist in the predefined list + for url in file_list: + assert any(url.startswith(base_url) for base_url in valid_base_urls), f"Invalid base URL in generated URL: {url}" + + + +if __name__ == "__main__": + pytest.main() \ No newline at end of file From c51b2952844065830244fde0e7082259c38b9609 Mon Sep 17 00:00:00 2001 From: Rohan S Date: Thu, 19 Oct 2023 08:21:16 +0000 Subject: [PATCH 15/17] Corrected the bug in the url and added few specific test-cases --- .../listofnwmfilenames-checkpoint.py | 441 ++++++++++++++++ .../test_cases-checkpoint.py | 207 ++++++++ .../filename_helpers.cpython-310.pyc | Bin 1422 -> 0 bytes .../listofnwmfilenames.cpython-310.pyc | Bin 7837 -> 0 bytes .../test_cases.cpython-310-pytest-7.2.0.pyc | Bin 12175 -> 0 bytes .../operational_aws/listofnwmfilenames.py | 2 +- nwm_filenames/operational_aws/test_cases.py | 15 +- .../listofnwmfilenames-checkpoint.py | 493 ++++++++++++++++++ .../test_cases-checkpoint.py | 207 ++++++++ .../listofnwmfilenames.cpython-310.pyc | Bin 9496 -> 0 bytes .../test_cases.cpython-310-pytest-7.2.0.pyc | Bin 12179 -> 0 bytes .../operational_aws_api/listofnwmfilenames.py | 2 +- .../operational_aws_api/test_cases.py | 15 +- 13 files changed, 1376 insertions(+), 6 deletions(-) create mode 100644 nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py create mode 100644 nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py delete mode 100644 nwm_filenames/operational_aws/__pycache__/filename_helpers.cpython-310.pyc delete mode 100644 nwm_filenames/operational_aws/__pycache__/listofnwmfilenames.cpython-310.pyc delete mode 100644 nwm_filenames/operational_aws/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc create mode 100644 nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py create mode 100644 nwm_filenames/operational_aws_api/.ipynb_checkpoints/test_cases-checkpoint.py delete mode 100644 nwm_filenames/operational_aws_api/__pycache__/listofnwmfilenames.cpython-310.pyc delete mode 100644 nwm_filenames/operational_aws_api/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc diff --git a/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py new file mode 100644 index 0000000..90829e7 --- /dev/null +++ b/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py @@ -0,0 +1,441 @@ +from gevent import monkey +monkey.patch_all() +from dateutil import rrule +from datetime import datetime, timezone +from itertools import product +from filename_helpers import check_valid_urls +import time + +rundict = { + 1: "short_range", + 2: "medium_range", + 3: "medium_range_no_da", + 4: "long_range", + 5: "analysis_assim", + 6: "analysis_assim_extend", + 7: "analysis_assim_extend_no_da", + 8: "analysis_assim_long", + 9: "analysis_assim_long_no_da", + 10: "analysis_assim_no_da", + 11: "short_range_no_da", +} +memdict = { + 1: "mem_1", + 2: "mem_2", + 3: "mem_3", + 4: "mem_4", + 5: "mem_5", + 6: "mem_6", + 7: "mem_7", +} +vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} +geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} + + +def selectvar(vardict, varinput): + return vardict[varinput] + + +def selectgeo(geodict, geoinput): + return geodict[geoinput] + + +def selectrun(rundict, runinput): + return rundict[runinput] + + +def makename( + date, + run_name, + var_name, + fcst_cycle, + fcst_hour, + geography, + run_type, + fhprefix="", + runsuffix="", + varsuffix="", + run_typesuffix="", + urlbase_prefix="", +): + """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" + + datetxt = f"nwm.{date.strftime('%Y%m%d')}" + foldertxt = f"{run_type}{run_typesuffix}" + filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" + return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" + + +# setting run_type +def run_type(runinput, varinput, geoinput, default=""): + """This function takes the numeric command line input and converts to the text used in the url.""" + + if varinput == 5: # if forcing + if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii + return "forcing_analysis_assim_hawaii" + elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico + return "forcing_analysis_assim_puertorico" + elif runinput == 1 and geoinput == 2: # if short range and hawaii + return "forcing_short_range_hawaii" + elif runinput == 1 and geoinput == 3: # if short range and puerto rico + return "forcing_short_range_puertorico" + elif runinput == 5: # if analysis assim + return "forcing_analysis_assim" + elif runinput == 6: # if analysis_assim_extend + return "forcing_analysis_assim_extend" + elif runinput == 2: # if medium_range + return "forcing_medium_range" + elif runinput == 1: # if short range + return "forcing_short_range" + + elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico + return "analysis_assim_puertorico" + + elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico + return "analysis_assim_puertorico_no_da" + + elif runinput == 1 and geoinput == 3: # if short_range and puerto rico + return "short_range_puertorico" + + elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico + return "short_range_puertorico_no_da" + + else: + return default + + +def fhprefix(runinput): + if 4 <= runinput <= 10: + return "tm" + return "f" + + +def varsuffix(meminput): + if meminput in range(1, 8): + return f"_{meminput}" + else: + return "" + + +def run_typesuffix(meminput): + if meminput in range(1, 8): + return f"_mem{meminput}" + else: + return "" + + +def select_forecast_cycle(fcst_cycle=None, default=None): + if fcst_cycle: + return fcst_cycle + else: + return default + + +def select_lead_time(lead_time=None, default=None): + if lead_time: + return lead_time + else: + return default + + +urlbasedict = { + 0: "", + 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", + 3: "https://storage.googleapis.com/national-water-model/", + 4: "https://storage.cloud.google.com/national-water-model/", + 5: "gs://national-water-model/", + 6: "gcs://national-water-model/", + 7: "https://noaa-nwm-pds.s3.amazonaws.com/", +} + + +def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): + if urlbaseinput in urlbasedict: + return urlbasedict[urlbaseinput] + else: + return defaulturlbase + + +def create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date=None, + end_date=None, + fcst_cycle=None, + urlbaseinput=None, + lead_time=None, # TODO: change this order; placed here to avoid breaking change +): + """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" + + runsuff = "" + + try: + geography = selectgeo(geodict, geoinput) + except: + geography = "geography_error" + try: + run_name = selectrun(rundict, runinput) + except: + run_name = "run_error" + try: + var_name = selectvar(vardict, varinput) + except: + var_name = "variable_error" + try: + urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) + except: + urlbase_prefix = "urlbase_error" + + try: + _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") + _until = datetime.strptime(end_date, "%Y%m%d%H%M") + except: + today = datetime.now(timezone.utc) + _dtstart = today + _until = today + + dates = rrule.rrule( + rrule.DAILY, + dtstart=_dtstart, + until=_until, + ) + run_t = run_type(runinput, varinput, geoinput, run_name) + fhp = fhprefix(runinput) + vsuff = varsuffix(meminput) + rtsuff = run_typesuffix(meminput) + + if runinput == 1: # if short_range + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + elif geoinput == 3: # puertorico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [6]), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif geoinput == 3: # if puerto rico + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 48)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(1, 19)), + ) + elif runinput == 2: # if medium_range + if varinput == 5: # if forcing + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 19, 6)), + select_lead_time(lead_time, range(1, 241)), + ) + else: + default_fc = range(0, 19, 6) + if meminput == 1: + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 241)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 241, 3)), + ) + else: + raise ValueError("varinput") + elif meminput in range(2, 8): + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(1, 205)), + ) + elif varinput in {2, 4}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(3, 205, 3)), + ) + else: + raise ValueError("varinput") + else: + raise ValueError("meminput") + elif runinput == 3: # if medium_range_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 13, 6)), + select_lead_time(lead_time, range(3, 240, 3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 4: # if long_range + default_fc = range(0, 19, 6) + if varinput in {1, 3}: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(6, 721, 6)), + ) + elif varinput == 2: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, default_fc), + select_lead_time(lead_time, range(24, 721, 24)), + ) + else: + raise ValueError("varinput") + elif runinput == 5: # if analysis_assim (simplest form) + if varinput == 5: # if forcing + if geoinput == 2: # hawaii + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(19)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(20)), + select_lead_time(lead_time, range(3)), + ) + else: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(24)), + select_lead_time(lead_time, range(3)), + ) + elif runinput == 6: # if analysis_assim_extend + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + elif runinput == 7: # if analysis_assim_extend_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, [16]), + select_lead_time(lead_time, range(28)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + elif runinput == 8: # if analysis_assim_long + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + elif runinput == 9: # if analysis_assim_long_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(0, 24, 6)), + select_lead_time(lead_time, range(12)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 10: # if analysis_assim_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(21)), + select_lead_time(lead_time, range(3)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + + elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da + if varinput == 1: + prod = product( + dates, + select_forecast_cycle(fcst_cycle, range(6, 19, 12)), + select_lead_time(lead_time, range(1, 49)), + ) + else: + raise ValueError("only valid variable for a _no_da type run is channel_rt") + else: + raise ValueError("run error") + + r = [] + for _dt, _fc, _fh in prod: + r.append( + makename( + _dt, + run_name, + var_name, + _fc, + _fh, + geography, + run_t, + fhp, + runsuff, + vsuff, + rtsuff, + urlbase_prefix, + ) + ) + return r + + +def main(): + + + start_date = "201809170000" + end_date = "201809172300" + fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] + lead_time = [1, 2, 240] + # fcst_cycle = None # Retrieves a full day for each day within the range given. + runinput = 1 + varinput = 1 + geoinput = 1 + meminput = 0 + urlbaseinput = 3 + + file_list = create_file_list( + runinput, + varinput, + geoinput, + meminput, + start_date, + end_date, + fcst_cycle, + urlbaseinput, + ) + if len(file_list) == 0: + print(f"No files found") + else: + print(f"Files: {file_list}\nTotal files: {len(file_list)}") + valid_file_list = check_valid_urls(file_list) + print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") + + with open("filenamelist.txt", "w") as file: + for item in valid_file_list: + file.write(f"{item}\n") + + +if __name__ == "__main__": + start = time.time() + main() + print(time.time() - start) diff --git a/nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py b/nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py new file mode 100644 index 0000000..83242bf --- /dev/null +++ b/nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py @@ -0,0 +1,207 @@ +import pytest +from datetime import datetime +from listofnwmfilenames import ( + selectvar, + selectgeo, + selectrun, + makename, + run_type, + fhprefix, + varsuffix, + run_typesuffix, + select_forecast_cycle, + select_lead_time, + selecturlbase, + create_file_list, +) + + +def test_selectvar(): + assert selectvar({1: "channel_rt"}, 1) == "channel_rt" + + +def test_selectgeo(): + assert selectgeo({1: "conus"}, 1) == "conus" + + +def test_selectrun(): + assert selectrun({1: "short_range"}, 1) == "short_range" + + +def test_makename(): + assert makename( + datetime(2022, 1, 1, 0, 0, 0, 0), + "short_range", + "channel_rt", + 0, + 1, + "conus", + "forcing", + fhprefix="f", + runsuffix="_test", + varsuffix="_test", + run_typesuffix="_test", + urlbase_prefix="https://example.com/", + ) == "https://example.com/nwm.20220101/forcing_test/nwm.t00z.short_range_test.channel_rt_test.f001.conus.nc" + +@pytest.mark.parametrize("runinput, varinput, geoinput, expected_output", [ + (5, 5, 2, "forcing_analysis_assim_hawaii"), + (5, 5, 3, "forcing_analysis_assim_puertorico"), + (2, 5, 7, "forcing_medium_range"), + (1, 5, 7, "forcing_short_range"), + (1, 3, 3, "short_range_puertorico"), + (1, 5, 2, "forcing_short_range_hawaii"), + (1, 5, 3, "forcing_short_range_puertorico"), + (5, 5, 7, "forcing_analysis_assim"), + (6, 5, 7, "forcing_analysis_assim_extend"), + (5, 3, 3, "analysis_assim_puertorico"), + (10, 3, 3, "analysis_assim_puertorico_no_da"), + (1, 3, 3, "short_range_puertorico"), + (11, 3, 3, "short_range_puertorico_no_da"), + (2, 2, 2, "default_value") # Add a test case for default value +]) +def test_run_type(runinput, varinput, geoinput, expected_output): + assert run_type(runinput, varinput, geoinput, "default_value") == expected_output + + +def test_fhprefix(): + assert fhprefix(5) == "tm" + assert fhprefix(1) == "f" + assert fhprefix(10) == "tm" + + +def test_varsuffix(): + assert varsuffix(1) == "_1" + assert varsuffix(7) == "_7" + assert varsuffix(8) == "" + + +def test_run_typesuffix(): + assert run_typesuffix(1) == "_mem1" + assert run_typesuffix(7) == "_mem7" + assert run_typesuffix(8) == "" + + +def test_select_forecast_cycle(): + assert select_forecast_cycle(12, 0) == 12 + assert select_forecast_cycle(None, 0) == 0 + + +def test_select_lead_time(): + assert select_lead_time(240, 0) == 240 + assert select_lead_time(None, 0) == 0 + + +def test_selecturlbase(): + assert selecturlbase({1: "https://example.com/"}, 1) == "https://example.com/" + assert selecturlbase({1: "https://example.com/"}, 2, "default") == "default" + +fcst_cycle_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +lead_time_values = [1, 2, 240] +valid_base_urls = [ + "", + "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", + "https://storage.googleapis.com/national-water-model/", + "https://storage.cloud.google.com/national-water-model/", + "gs://national-water-model/", + "gcs://national-water-model/", + "https://noaa-nwm-pds.s3.amazonaws.com/", + "https://ciroh-nwm-zarr-copy.s3.amazonaws.com/national-water-model/", +] + +valid_folder_names = [ + "analysis_assim", + "analysis_assim_alaska", + "analysis_assim_alaska_no_da", + "analysis_assim_coastal_atlgulf", + "analysis_assim_coastal_hawaii", + "analysis_assim_coastal_pacific", + "analysis_assim_coastal_puertorico", + "analysis_assim_extend", + "analysis_assim_extend_alaska", + "analysis_assim_extend_alaska_no_da", + "analysis_assim_extend_coastal_atlgulf", + "analysis_assim_extend_coastal_pacific", + "analysis_assim_extend_no_da", + "analysis_assim_hawaii", + "analysis_assim_hawaii_no_da", + "analysis_assim_long", + "analysis_assim_long_no_da", + "analysis_assim_no_da", + "analysis_assim_puertorico", + "analysis_assim_puertorico_no_da", + "forcing_analysis_assim", + "forcing_analysis_assim_alaska", + "forcing_analysis_assim_extend", + "forcing_analysis_assim_extend_alaska", + "forcing_analysis_assim_hawaii", + "forcing_analysis_assim_puertorico", + "forcing_medium_range", + "forcing_medium_range_alaska", + "forcing_medium_range_blend", + "forcing_medium_range_blend_alaska", + "forcing_short_range", + "forcing_short_range_alaska", + "forcing_short_range_hawaii", + "forcing_short_range_puertorico", + "long_range_mem1", + "long_range_mem2", + "long_range_mem3", + "long_range_mem4", + "medium_range_alaska_mem1", + "medium_range_alaska_mem2", + "medium_range_alaska_mem3", + "medium_range_alaska_mem4", + "medium_range_alaska_mem5", + "medium_range_alaska_mem6", + "medium_range_alaska_no_da", + "medium_range_blend", + "medium_range_blend_alaska", + "medium_range_blend_coastal_atlgulf", + "medium_range_blend_coastal_pacific", + "medium_range_coastal_atlgulf_mem1", + "short_range", + "medium_range", + "long_range_mem7", + "medium_range_no_da_mem6" +] +import requests + +def is_valid_url(url): + try: + response = requests.head(url) + return response.status_code < 400 + except requests.ConnectionError: + return False + + +@pytest.mark.parametrize("runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output", [ + (1, 1, 1, 0, "201809170000", "201809172300", fcst_cycle_values, 3, None, ["expected_file_name_1"]), + (5, 5, 2, 1, "201809170000", "201809171200", fcst_cycle_values, 1, lead_time_values, ["expected_file_name_2"]), + (2, 5, 3, 3, "201809170600", "201809171800", fcst_cycle_values, 2, lead_time_values, ["expected_file_name_3"]), + (1, 1, 5, 4, "201809170200", "201809171400", fcst_cycle_values, 4, lead_time_values, ["expected_file_name_4"]), + (2, 2, 4, 5, "201809170800", "201809172000", fcst_cycle_values, 5, lead_time_values, ["expected_file_name_5"]), + (3, 1, 5, 6, "201809171000", "201809172200", fcst_cycle_values, 6, lead_time_values, ["expected_file_name_6"]), + (4, 2, 5, 7, "201809171200", "201809172400", fcst_cycle_values, 7, lead_time_values, ["expected_file_name_7"]), + (5, 5, 1, 8, "201809171400", "201809172600", fcst_cycle_values, 8, lead_time_values, ["expected_file_name_8"]), + (6, 1, 16, 9, "201809171600", "201809172800", fcst_cycle_values, 9, lead_time_values, ["expected_file_name_9"]), + (8, 5, 3, 12, "201809172200", "201809173400", fcst_cycle_values, 12, lead_time_values, ["expected_file_name_12"]), + (11, 1, 3, 18, "201809173400", "201809174600", fcst_cycle_values, 18, lead_time_values, ["expected_file_name_18"]), +]) +def test_create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output): + file_list = create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time) + assert isinstance(file_list, list) + assert all(isinstance(file_name, str) for file_name in file_list) + for url in file_list: + # assert is_valid_url(url), f"Invalid URL: {url}" + assert any(substring in url for substring in valid_folder_names), f"No valid folder name found in URL: {url}" + + + # Check if all base URLs exist in the predefined list + for url in file_list: + assert any(url.startswith(base_url) for base_url in valid_base_urls), f"Invalid base URL in generated URL: {url}" + + + +if __name__ == "__main__": + pytest.main() \ No newline at end of file diff --git a/nwm_filenames/operational_aws/__pycache__/filename_helpers.cpython-310.pyc b/nwm_filenames/operational_aws/__pycache__/filename_helpers.cpython-310.pyc deleted file mode 100644 index cdc10240cf48e22b9ee0f906911346cd4c359433..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1422 zcmah|Pm3c(6t7p^>F#8*vr!S(gOG!V2|}XaL6&h4)IBY8Fo$Iuq-nZpCY|knQ&pMl zgwAOO)RTA?AvrF75kEs6^)z2VWEJ$i?nIUy&}!c6SFhfy_osfZymzn1aQ*S~2lA5+ zW53bn)#c#kGbDY2OfUf%3)?@0fP0?t+zA~Zos+q_8@eb5!WEqx2s^?P{tXK~0aG^W z-r^nXbOhF~Vr9}eL(w%CB2PG-?3RmvA_}b^Av4T?;WfJk`qtby#;y4^|BiLpH|z`q z2!0dP>_LsHN6vSv>3wcWb8@emYe0Vj;EB z<7sPiG)e$d*e}Y{LwXBIKR|ZQYW8)o2E#V+EvsS6f8ZB<%3iP$x8AwDl!dV!UB$}+ z6@2`m?VhEXjEXpyFCaWjW-@seEmRg!OBB`payprnxtz?)%d5DUh}gtYoFte}B%jB5 zl}SA*mU%??&^3BeR#L?VD~huyUh2vIB$~+#kMy{@vc2Oh)g~$P>g14;r%E2M)%$G% zmvNQ~N<)v2voeV@eS*T?;Z6%E%_-e7MAo#RzlIt%Y>PNpfa0yZYX#7gJLl4jxbo4# zx|Cp3Ys(0N>HygQsmW}}M!q5d*p7;eb7>u{RZ-5i`)|t>0+v~SUZz-hHgb2ZrL?WT zdr}L=-fydG`c8*!&@MVFifjk3_kfbrePjct2OQqyJvapQ4l3>dH9+O(Wd18Ms#?Gn z;rK3F^VMSm8|=guzCT_&wKI2w^Ng!;%{Hz;0RPB-;sjINcSL6=d0U1Wc)~b#(#7u@ zXX^F^QzWvO;;jbe{yey<+LQmk`)T91#8)|e36tBu zaozrc6|ek9<)RP|hqp8(tAkUVuAOGoCERDJvB7?*tUHsjum{@2W}%~`6f(T8rHO>p ziApPiu=N%dLSBZxl4nZlneEBviEPevIQXia&oB3?k*nUu$civ+gFWymq^`|E0)F5g-ti1UmH|GKAZrNK9E~xXPuXym!yBALiQ=6@G`z&PnIMmG1-;6vL(xs zBR^6eOET3dZmz3Zs(^EDzMHr5UCq)&S#=9t-O>e|cZ*%aG6bx-C9A{=OlL)Auo5e? z3ahdOR%457>0!w#vt_ozR@oXm#p-OGoqkxbD(nn9%g(VU*m-6?%v)7A?P?0I$*6m>wi*b9Kx*|Y3L_G|2`?AO^#>^IoU>}wBo>ojEjCi^<@XNp?- zvRBz{_6q9GvODY>kax~`f_?LW!vD@GJt{qxtn<#f5$4V@ld^NZE{!m!OuH+-#%p6* zgN7VyUEh1)90JO7-gljb66?$kov_n&Vx7`~?>RshdfaFIb_lT6-gDXyTKl%!VXZ!Q zgYCE!?D;%waogKd|t` zc{g-C7N410oyd!qk0D8XygsLLLcekhmGo*cGl+z+Q5s`cN@InR93?6xc}g@&3Y6%S z6e*#tJ}x1_26eivt4Ve($*w2ajU;<6$(~QLn~efr03WYWvPj7iCCikoP_l|7Hrji( z=Q(bRhnDKv9xcY2 z$VBqX?>6`RuG4(m-#@gyCbpX0vfFJZ2(Zw0x92)R(>v(4b~>)(*fxupe-NE z?@J#j!*Zm(FGYrcD=Zh~nEJlT@)59+{yy|TqyQ=kr~pW11>m(Pj~e}h>aZG>P+LZ+ z7*#}Cg)C^0hB5Hp*y{x5PTy*FtN|}v9$uT6Pg__ z^v!V3G3{Q@?X>Otu46)ln5a#*L5k#lk9VMmOj_oxfrboHt@?D1c09c^G0hpbSWSi+-`+?a0OMSqnG{6@pbQf7+TDLi&+hqEb_Zlk-*Z|Kh zRuVADe~Fc+e79KzhGu2T^ZG>2>#Qp37C>)J>MT~9^07%BV~e7033T41&ST4{!x$Q? zgYO;Pej#Qn5F5wqcO7UjY`Jciww~MZ9PBx$prbo+vI)f&?bBxMc!D2mwv{MdQpEAO zi5hJkvlofV6nZ+oa01OtoeE_dpHAtrRwfY?KQRwC6A-VZ_@n(0mf^%sP7LQt%A2*r z@p4+5>CG1`=Bx+2PSbJzX!ETZeqvYGGSV_wzE0d{=OYu=ZE*w|DsO<4U!deEVk$6a z$L_nwPYAyO1BEjC)SQsfCi|W~1e28)8r1|q8dXKr z1DTg_h~(rD!H-h0z40~j+lieP)1!plSl&qnA}0fx+C6cu`E!u@0G~M0OX#EWxvq%X zFZ6?sHTC0@vq&N-L|>RPQuZZYhAx*#tIzr%T!%Ff#&V03(fE-3IdaAT=x{~m*%^Kl zgzw<{gj|`bY{;r2>Q3wn|27C|%+vNPwFT*-N&YI3e4CPEqrQc@cd7n$N~rV&b4fEV zMoR`RlDHbdpNQnUN9;4dM*QEP+-$D7rmicd! z+^jcHbRCjSO7+Ji;4UTKMKZXx7lysy#b(px z_3sPkr|GrZJ58u7@_L%&V>btP{y&_45MG|z-sW4c-E9tTq;lXeaeLQ6!~QP1+v@~d zqQ!|@b@>4Hkzelm%yFB8=Z|x>UBAyJykEk1dRGi~uFNyL?URvDr47>LT*fq9?qRxu z>sxl$#^nP(pJ=tAbJ``xJkxn@a+}U@`Vc%dkEGNinQN)X8sQ9O);t%NCPrI0Xyr*k z93ET-6HPR!JiY}h$#$4ve}aYrI#xmrDg1WeOC4o~zICj95GdOiv7C0>|Lor!u3@O88sQt_j+g=g=-7__08(i+Pp3pMAjpz!@ek4#k)IYVOSH0R^DiMM8Smp%j+l+|k@;Fc zuqG?-7UZ0r$T=tE{Ogf9OlR&NQkj{Vt0Ckg)?zXnOXxwec0Zp(y#(r|bnd+WPRD`u z#-4sp;g`UjKugK~MxC6>L7$d0dvSqaYSsJa48{4m6w{tVMt0xLWc(C;sEIz@N~JL~ zb3QIiTYC)3i-Pja$!x>rwdt3oDCJ%3XmFh&8i-KF@E0okJA&&9C zBYBC8b|Qu7lA;hL0%I2lOCIm|9|`&`f<8tvxaoWD;bbb({)-v1&E%$G(jA><+C&6n z7Ecq7MCT*4Iu>1`rp9EYMdXd2B1rU*0&(I(GoYz5#Z)y;izpD^ARL8i_%oDTqQs_2~#67uUsbfQNK73VzvfKxm%&h^7KCvU~d-~QUSUcD!-C*p1(oO0neb8;z!AIo1O zh#bw>c++P>7#hVf z_;sqH<>6wN( z1wg|U02eO2+Go|gj_V;_g;J`kL4y97o+sE4u%ap8o7E;1vsxo>WH`&30Y2i^1&<*l z>5vPlTJ1}4x{`9%Ce#MnO=S`POqK>&M4e#Epwm7}dUO;@I7p5L?u0ri5q~n?EUqRf zfkR&in@HBRl}VY#PHpRoiWyI4k?7{UztBHdYy2IoQ5)a%p`%a!ukd$c0PuIovxOFA zxxbKy3UU>Bj^&34QV#R@G^Py;tT5C=1Sg|>q(y~^kExIJVG*8kaU?T}U2TR6)9D9? zcNS1l1T~EWkBhQO82HYh9vZ-xC@9Lxz#Ah9l_q$Veh`h6AFmC8hgLTbKrKDWivZ}t z2MDUl(mSueh2K#s-cgQzGToJ~O2N(nF!bIdUmbOg+IC#NcIE1=D=%EVc?Hj`bnW_; zD~io(x_z2UX(@KeXKhiE;%hr^Cz{Iw?~$lgMiXkZq#l zx`p^=EFZ+O(a^-ch;u;3d11yl{XmIT#Q0Hjz&oMSsBnsqaF-Hs;U|SOPm^&{S8ghEf_f~D;`F#jPZV`mk6cG%u) z*{*Aee{g-Wqd8!N(MYUB5re^B8s80sEwH9j-P&_p{3!`0Gb+LsWQ42&0yK1;O$?E+ zP?t!i_<fW$izdYPMecuUZk*IbGFNrJQ1nUl^zK4XCDqfmhMf a`XtvrHOd*<|CGM32l|sbjKPYme)d13(~Hdj diff --git a/nwm_filenames/operational_aws/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc b/nwm_filenames/operational_aws/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc deleted file mode 100644 index 7e260453176c33de2ba75b7f26f76a97ed49658a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12175 zcmeHNYm8jURlc{Mx1aMKkKg0xy4Rjp_lzIQ+U%}38^R_I?6MouWYv4!b!VpSe!1># zd#AZAh4qSTkU%SuACWMU9f6PqN<<`r2%w0kKuG*SNa+3y64Lxb@;~zk-#K;fo$j9L z^^heINOY@CovJ!jb*gUFIj2rlE0;@a_$7buyft@7)Bc$f{l76JUP7?%>Y64rq1Uyt z&T*s>jcB^oh?V2IDoK8 zHO9;1jfwI^W3oKim?}>-rpwc)7ZdRhwDOEdh$P}!VTcss10pRli07=tt%DB_iR^|Z za^KJ&MMPeVeW2af%ZJ6dm;l8QF)5}H9~IML2Jta5D-Iw&F6P8R#Pi~iIE?rNS~&6z zy?he!QN*VZA47Z^@o~gwc${a;XT`iY0s80so==KX{MW>3amG3+&I(;Tx1q16#X0;7 zC_62lNB$}6lsM1%1>~Q_|AJURegXN5l)n)L_e)}#xj$zW#LDf6{39#z@cBoWS)sql zp-|kWFLI-xdxkI_%c(W2Le@*!R^6&PcTE|f8&)eoWvA&G4fD3uG#i#@AX#y?+Q8N~ z+tOOEz2~J+(eA7hnh%Qo^o(k`vfh$b)wG>Tb*oypym3FfZkeJ&On5n!(~^_uNeiSOW-1Y#guM|kob0L)sXruPu{bVqYFSHBrW$BuRhkhSI(SkZ(S_d>0k7b`K$AUEf?%_6c-Ej1DJ?IH;>yiVt?tyCwwI{4Kw*1x6??PQsS75pY}8vf&3c6wOsZm=>lU@( z|-ERm--)i`i(`EqkeXuffZYMqw|t+LmMn zF)s6-y~Hzw6>Zsz?JY0Q$#54ow5JTi9C#ooI3*nl&5?W zKs8N_iE$_PFwR2g9nd5kh#7ek7{mw*q48hW#e|p)soG3MqN>AGN>ojPYKqb%u7FQ7 zjuwE=Fb=T=e3o&DF5m|kk1;;Sc%1Qrj6;Bd<`Cma#t$=YFn)yb6yrx3Pcwdu@eJd~ z8P764&v=gU6O88xXKV}tl{g>w;|V{W^y4W%o`%3Z`5^JzBxYwxJ-V|jat}BL>ZjZT zBSdbL#4bV{A%T!YFc4A*X@m?y79od_M;JpGN0>mEM3_RD=}oiPK69h?FHk|eX(b78 zSt2k&U_j4Ns>x9j9%-u(agMmqp0ar9xbi8uL8zB`r!9a1?a3# z`L042%USw@3HpLiwkwj<0AVEpYXmMySjS45v*s<8N&l`fyu|Pd!Qu)-E-i9tkxPqQ ziZLaqyu{@tE-!I;iOW~H_UdZ4U)it1o}fyI)65w2DfcOv3v=Z@)m-e?xvTw&i?Tpt zW<$YOS}6^*QW|Jw zb)c2iQa_)oew)eySzS|Qe(KV&GS0E-5ZKqG%A;Dk^XD-Z~nHJy?+SExTru?O1D6HqCoxtyYLhB4w~01?HU?LErAcCT_`E zwS_vL3hU5W!%~Ai} zp*@=08P*Olw|Z?62ThhhwFifCU$&d_F?P!XxBql&d3TfsTv* z5wib~xkeOZpUD@9f-pK{ibUAPYfM2F;Ok7GEI>FgLW(ai1#CI_C8hv|Ht5KiaAC=D z3@c6;VU0G|fW-%Uk*I*72Am-elCYza~sM8oW0;LYzDfk5z#Kp=Qy!vZ0x^zBa& zPkfMhk+7=-t`T5W+kbkZPWG+wQu2#LUMBDh1inPzB?2!KAVnw76SzQN=$xfQdtgt{ zLC~h9_n#B%nm5AgQ2XYh6> z}f$jOf zV+-Vbk5{C!qMSz+tOa%UeU~tk0E=!mRU(y3+k8-_g-aTHzYAC1B;uh}shoA;(T{qN zg1{|+kqc5T1d2s@4n9vE z@Gu&{28Kaug{JR1!jpG>)j~e8K3BOu;41%|FDOU3%Fl*^^89`TC9mX9;3`)hygqIR z9s9ra3~qZNx9|TrxK)n*EGAnyhj#qAY{l;Kj&klk&)|Fda}V`-pNDQ*-l9)VM#nex zZ|kxRWaP?_O_b7=#J?gQ=A1{fi*wHXB%IIma~@s>vAK1JxUh#Arfo(7fR?h|<)!?8 z?&c@Bt4hJHgY6U?Htv&SI^9dc6`h2gXSjy_Eq`Gfu=6sMKV;{95H9Sm?t5Ve2L!lt zMfO30ZmUEN?&t*DeJS7WgHv~x-N!dqaDy5+FpwQHri8e>jBlZ&IA2=iC^@)P_&Dt) z{Zm*$?_MW+=W<~00NHKtDE}W!A%(zm)n@^b->{Gvwtfzx=Q12@m-(h-!1J|&V~6}n zltcV7gZo^y@RRLWK4kPLlmB_bcI^IFsN+80*p+^l&&uks0-Eew^?_3nE%4#s+xp+& zkcL!YPVGT9-1sQ_Y2Q67zSn{Gq4>7Hqun%X8W z^)`v5ZMvn}rcbs_Uu&C8$!#(v3sL3Q;7%oF7_7Ovmje%Uv(+#Kd5f*~VzXtMiyN)G zOYP20zDQeYR;%kvw`(>#!qwfMSzow^OJBLr zXbG#n)V(-dt6Fb$aNW&yKc?QH4RW;a-R9wq>gd9=L5DP`1+c%+#xU*DqS-LJttRgM zxYzD00bR8wTboSSHKkmrw%S{}$@gTWQ1bGFz7KC^FkLb0rhVJ=4)2+*TpixAp~7kl zm+W*!>C`tm^>y#)h;ro|8(F<=R%`3Es&`^U^&P*`F0PeV$U8DrQ)LHZI{9&>{?N|s zO*1^gXOIOW8JvTqN{S=vltz|} z?q_Xeor~VVy?9e=Y;vGTP3|B$=LO&7vpSv_>Dm`@&;F)%5BlRq?IX1J zS|J8+o^_{#`%k;T#n^uJV_; z4RT_?#%(Akc8saAi z+cV6|_y}J9J>cadUGfb4klg$u+y4_cx@&!|MwsgULxEl9{La-qTk&4HX5;agW5R&+ zQsEOnFGdgjaC^f}%^i+C$fX-bZC>8=$GIof5%{d25vv@N8W}sx)UKLAL};? z5&2!vcv0JtkT8+OWxr3_e8UnZ_2P!r#2%H`{t$KeeKLem?;X_B{%d=Yd)QuLdU?E)R*IRXA$$6R|;GL$JuQli2 zdgHS%%y$>l*Z!O1Yi{it4t+ZwPXD+2PB(FSbWh5XRX5}w%lTFDmttx zw33(J(Mn#nUn_a3P%EkHsXrzT$yKi&Whe*znmWhwO!1u2ba~4EGQ6z6i6Wa0i*w1Z z>vi4G-;WrvU)EzkkpDoHj7p_p)|!<{;k;)AFF|?Myn_|>c63pV9EyEWTK8nl!9EYO zyriGU^Xny474=YNl0-mj9Y-$`3$y~&b5`}}F^=xllNEAlsw)gW_mV=B>_=ml6rFq3 zS6w2kNMMz~bpS8cFy(D8gV(HR9^0d9@ogO6N3aiAHi))Oz$QS)ac^R0{?#?I?Tv>M z@8`(BBl5o`@Vf-a7zyW$b$x|;uQmI`PPleG<3*f%J9X9GRB$_F_LpS{bT=;P8efZe5?Ng5kOymV8z$uVl-%PJ_EJBlherC31%9 z8SZ+Cq0g6fkQ{>K9KZ>Zvy|*Qxd$gea8G&5OCFM^An=2OU;qON?uk!v;0xjyag?um zINaGa<#WQt^lx=_cU5(DRrNagd`80Wv%ma?^>0r}($A?9|K(752A}(mEJ;jaa!YE< zhAc~#yrpbQGPNl!$)H?qscp^BM4Q%1v=c_6oivi7o@k}ox}gu@(?(j1%NQA!WGSYz zG|R9o%dtEwup%q5i91;%%gSt$O|fY;OA>Ct>7Sg+0g) zvBP&%qri@^qi;(_ksV_XAuq9qSrz$2`jP05z0AJBj=wD%W%dd?!4^@PWHojYP*dy_ zJB@sronen6pJ7+oW9)H2%%b!JJB!jBdxSm7zR7-tJ;k18=h(N{Gk0`j9=M)m-$wsJ zT8aMHtL!;;9&P*B1@=7f?PuR%FCc$_T{I4`Uo{T07mW(L#9l(%gKUXiMt;aT%&y#0 z_}^LSt@O5J9I^Iqg2vX-ux=fhlQuznrd$o zBvl$Hl&F+wlq4ugQj(%XrzA}YiT@yr1j1^y>!-r(beNq9vq!`1u`qi)%$}&Gcmd;h zk&+T66O@!GnWSV2Nswu*nzn7V>fAH5mT5C1%`MmBYfh6J8PDR}Y})7yQp*l+H0_mY z;=X8uM8mPWu8~|d*Ue_r$aK0E_Z;4AI8}Aes0I-r`cav_=kR?>d-o+@o|mplAIM7| zNRXA(n#oxcQlX>Jv^!mo9{`w0KEGaDb=p?#rn9zT+BHa(SvMOE%XPtMv)yS~Zp~hA z*O!|u%QoAnIUS3eUeke;)XjBBNvBzBHC@kH2HXg2v9l4RU8`j^JWP6&mPTvTWqt_R zeb>+T)50F0gqBE)U$LBU@hI$BJbw@{(KP6;(zwHv97FPdnS^)ka1s>0Y7%sx7)?Uc z1yX~wX!Ij-A-N%5xT{E*>`PlX&)f2Q(p{yW^ONsMenyn@O!ZZ!y{EB+k6vGYPhv@5 zK`AXtDU>vpLVwavphdr1=okDfT63tS{k%vEz?HlzRWm*3)zzk3S?<~m;!XvU$-uoz zhg;AJ#8Hb?Je=MNIx3L0HHep6X?m5W?Ku^1)vB1CPOI54Z?vpRm$xct4Mid9?-bM$bmRGU8<>nhf z8kpSfvM55}hQ$KmC&;LdZQU?ks~(OrQbLQq;b9`D#Xt`fQ?xl0cFssBW_sJaC62%; z;w_!CQYj}tD$mLMNmLr7iJ~8sqa*453o5?MWT^h}i%3uiD+iW(8HteA~-TU5d@$*0}R|t;XZClV{5V>}nM6cDf zEyx_y&~8Z_3ZW3uE(vSX7URLQ#Zd9m=?Wegs?z$NJ0nz^LRSY5-Unt!r3!@_EJSee zn}MoIsWOW z$3N{}Nd<(m^J%KU(V#m%`cau~dZAG(D3C_g&~!`YSsWrcJVdaYG>A98YGNt8s|9%! zuN}zC;YQ?eBcr=doNN9laJKP@BVEQCm3z7(TEDaoI@Yv~PwAHOB@b(1%BHd=@f>uy zL|T1k4Z?hw24Eo9`6I|bAoGn3G8A-}BjfHFPNZ}3eM;s`LC(mUBHHe|7Jd66!=~`%tGn)JPTG(^Ps#2X zg6Z(~uS2M6T@(I^7{-=PV8996|jyA$Wz7 zSCRD2u6ka_{bsFZJ8hG>xS6cZqV1UG;)=6Y>vV4j1E^*<8p}1PE3$lQWK!39&;Nf$ zI<9wObobV-y>zwq`J)jY7);z;u`sc-g5`Fa?xL7+Xj+|E$Nk7pv>j%(YQ4wzj%u`= zE*p;hig61oVzXm$?ptZxzw=l$Ara>U$Z(=k=^ zN9I2acZ-r98=1u%rCoDOjaDVw9|}qci2f{0cTf%E&jZu+nuJm{!sO4!b|1&^Gr-OW z*k{JT77-39((Hn+vi-;2ESuewe?#_F%$#9!bk$&<{%FPZ-44I|3^eS|Yc zxe|gvf)_(-l(B-4cOQ>|E(5w8(a!$wXdmD=WcnS2AI9jgR1W1EZE}PMYnqJ7Vv)); ztNqV0h=XGxMk@!7xZLgF_-m}8B-U^?!p17G^YP$lwrxNj5|FPCsjcu=e#w~Micxq@ zjJ2Z?g3{$({K=2zx`nw83-}+8=K7tlHF7C7;}L=3uSOU~8US-29ZRoTZ1gcP`lIly zgC8G_@O&6s$(PUlNcY%!gbn#aBxfs9f6kvdFMWURduo5aztG?3&uz_l^ZvY_^>hA$ zzt7M3Wq;C7`&0h3pWlQIm8Ex-+aLRLFdZN6^Sd2wghL+su$~`YZeg{T37zqyFC2ZTs=&SVT+c*oD$%vQW-Gj= zMF>lTwdm>|mjSBhbMCl457&|dFVMsJT#H<4fw+(30frt=MmCsN@ zR{nq{6%I|0NL3M}sxF8q5I;hFA~N(4l@3vIkP^E8`RkOBT`!^&9b#0V+Ri%1^)Cf# z*K2UHR*b~?XD?p5F5DC08wjIZ*v*_w3Sr0cOH?95GswJdwz}2@;$DzMSOpPN5hbYZ z-{VXPDg_zACPZQ>CIVPd3Qg;4g!3vABZaL|9D|>xrWz&0BYu`T^?JnXWL+>jNO%r2 zH-dx^EF(c8gy;+cCPre7&=^UCe~_RP=OE0a)Xl9y@pk(!7Z#+ zL<{Cgj3aDM`U((;N@kdydRw%g@g|G5Pu4#)K&rp z{R=%o<%}pRNd@C}T0@AP)=VN38<|UHFpfs+VoXM0(t#IPlclfd)0JpsX$YOcyb&%M zzk@3YTm&8V<^Y%cB3#k#QaFO}Br!UK4r??%+;6$G>8TURtt zJftGQjrse9eorg$+u%_H-{?o0fb3sk@0P(l*t=xeLW?r>eYvk7*O03$(T6?SPawyG zP9IM?eceNF(ogtFKQ**5^-aB>hNYa|lvxVa;Bil3D9e)l7;I#e`Wbo#dK=--^JIr+ zJ#8!DXG#A;Yv=qdwDxR2@8@qOVbK;)EB1>l=ND%Y5QU5Vx}V!hg>@Qp`#ZOvf%Pr> zxlII5S^lm8-%joHnUg20b6?m05qfb)V+m!&4G^DS zM=~JieIPU`acDs5bRZPIK>RfT^m6gF_e#;*0zoYzKpV*Gft)c?SG(f9qM8vB9;kSz2omB#4oZkM-m1Hn z$s4P6v$f)Iig_5Rn|MhuTdg30>lv>sH_;eqc-X+eb>8%>YMI|c8*fo^iV~vIIMvNb zKL=@An^1(98T)sQglD?9TqBKH&~BrsQA;4V0*&f{MneJ(lePD1SB=Q>F42juP|}d3 z5h)M<6lL!j*FMW-GP16UfDvGm_()GGZz=y%)^eIIml4+5e=n!<-vi2BazGay$$`)U z=TX3EM2HP%XJ23O6+B+Z@H15=Z@~{?3R8bmnU}U@CeKQJ4LU$xlleg(55~SGP8V8m zZnPzS9JmHw+`D&gfbSjz2!hjUdVxmN7c|FfmlJ#VyCflx3>0KNlbA(i1!i_baT7PB9M?Q40B z)yl4Y&?%?A(AoKvEH`-(Z&p1Wt_#Gs8fxk=TKS2`YUcLO{g{AtHS1vq%@w7pG+Hi~j{`vlSu$ diff --git a/nwm_filenames/operational_aws_api/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc b/nwm_filenames/operational_aws_api/__pycache__/test_cases.cpython-310-pytest-7.2.0.pyc deleted file mode 100644 index 272e3a1529e26bddd824bf856b7705c585772078..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12179 zcmeHNYm8jURlc{Mx1aMKkKg0xy4Rjp_lzIQ+U%}38^Q*M?6MouWYv4!b!VpSe!1># zd#AZAh4qSTkU%SuACWMU9f6PqN<<`r2%w0kKuG*SNa+3y64Lxb@;~zk-#K;fo$j9L z^^heINOY@CovJ!jb*gUFIj2rlE0;@a_~m}@yft@7)Bc$f{l76JUP7?%>Y64rq1Uyt z&T*s>jcB^oh?V2IDoK8 zHO9;1jfwI^W3oKim?}>-rpwc)7ZdRhwDOEdh$P}!VTcss10pRli07=tt%DB_iR^|Z za^KJ&MMPeVeW2af%ZJ6dm;l8QF)5}H9~IML2Jta5D-Iw&F6P8R#Pi~iIE?rNS~&6z zy?he!QN*VZA47Z^@o~gwc${a;XT`iY0s80so==KX{MW>3amG3+&I(;Tx1q16#X0;7 zC_62lNB$}6lsM1%1>~Q_|AJURegXN5l)n)L_e)}#xj$zW#LDf6{39#z@cBoWS)sql zp-|kWFLI-xdxkI_%c(W2Le@*!R^6&PcTE|f8&)eoWvA&G4fD3uG#i#@AX#y?+Q8N~ z+tOOEz2~J+(eA7hnh%Qo^o(k`vfh$b)wG>Tb*oypym3FfZkeJ&On5n!(~^_uNeiSOW-1Y#guM|kob0L)sXruPu{bVqYFSHBrW$BuRhkhSI(SkZ(S_d>0k7b`K$AUEf?%_6c-Ej1DJ?IH;>yiVt?tyCwwI{4Kw*1x6??PQsS75pY}8vf&3c6wOsZm=>lU@( z|-ERm--)i`i(`EqkeXuffZYMqw|t+LmMn zF)s6-T`}9WC7va$YRg`1Z+Ur6hC68r-8_i^d`Fi{h(<18NpcZOL5?H#;?-8OW6Me4 za+*Mlzzl)I1dagUOL6W|$~Z>gIDvTrCkUJ*aEicb0%r)&uoYMYHoz@5`vEFlCQt;} zC$AZNbLmv1+1#-DIF3^@yEx|D|0#|kWq3B2#dWMMiQYcTOPDUhpS&{7Jgwe~3RBsGn9AQGF zdy|gtCSAi#xoJ1!X5E~dcgNgucfy?%nP1KJrbM%uv;cgDafmM9vy4N00YAWajPW_fb#&e9HU_4JaV`C7i#QC@%Px$eqA5Z!5G=%QS2Z`S%Q9Da2(w$|od%!VJ zKjj`6A$FrAdJ*CX34|nqfsjH-BV-V=2swm2!WhCh!UV!3!W6+0;pPRZg~2-7q5_Pcsjl5IBoldr6pg+EJCI(72=XqnU}(vR!UQ* zs1dTIyu7auD@xOLt-V@+pjs?07mLd)%PUKMuiPPtutjHixx3i6QMqJsN8zaS`ttG$ z*n#L-Y*u+M7j$zC<04hF3LT-;5Me`lXysB+NSI#;GFDQhKuN6y2xyerf}&CgX&@g9 z$yY!RrRHacq{Wc57?Kvlri)?QMXnLpCs%0Y6I^Q`k37St321*@vEoVbM?5|&9P(-2eIiLkcNehTYRz`XxiF8-=wmQSd<@nh4kwnV zv^oxm3i(28o2`jgQF)v9*3m%h!HTSJ*)`Z_wq0vfHqCoxtyYLhB4w~01?HU?LErAc zE^f(MwS_vL3hU5$! z%~Ai}p*@=08P*Olw|ZFhLW(ai1#CI_C8hv|Ht5)y zaAC=D3@c6;VU0G|fW-%Uk*I*71{@*}lCYza%nm5AgQ2 zXYh6>} zf$jOfV+-Vbk5{C!qMSz+tOa%UeU~tk0E=!mRU(y3+k8-_g-aTHzYAC1B;uh}shoA; z(T{qNg1{|+kqc5T1d2s@ z4n9X6@Gu&{28Kaug{JR1!jpG>)j~e8K6kl3;4c3eUr>&6m!Ay<<@x;xN?ysIz+J99 zczxUsI`@C;8Qk_lZr}fLaI2jASxmNa4(<4J*^1re9p&78p27F@=N{_wJ`dftyhWd! zjE-;W-_~Ut$jFr;n<%AgiGM{r%sG!{7w4S&NjRVB=RCX+Vsq;babXWLOxuhE04-&^ z%S-wH-0e?rUzLJg2iqw)Y}_Zubh?*>D>?}~&u|UI0`DTHwRM zxAnilAq}a*oZ5qIxbac;)5zVXy{yjWpC^o!7wML5+7)cu$+rn>Q67zSn{Gq4>7Hqu zn%X8W^)`v5ZMvn}rcbs_Uu&C8$!#(v3sL3Q;7%oF7_7Ovmje%Uv(+#Kd5f*~VzXtM ziyN)GOYP20zD!$cR;%kvw`(>#!qxqsSzow^ zi(k3WXbG#n)V(-dt6Fb$aOKT)Kc?QH4RW;a-R9wq>gd9=L5DP`1+c%+#xU*DqS-LJ zttRgNxYzD00bR8wTboSSHKkmrw%S{}$@gTWQ1bGFz7KC^FkLb0rhVJ=4)2+*TpixA zp~7kl7wvRS>C`tm^>y#)h;ro|8(F<=R%`3Es&`^U^&P*`F0PeV$U8DrQ)LHZI{9&> z{?N|sO*1^gXOIOW8JvTqN{S=v zltz|}?q_Xeor~VVy?9e=Y;vGTP3|B$=LO&7vpSv`>Dm`@&;F)%5BlRq z?IX1JS|J8+o^_{#`%k;T#n^ zuJV_;4RT_?#%(Akc8saAi+cV6|_y}J9J>cadUGfb4klg$u+y4_cx@&!|MwsgULxEl9{La-qTk&4HX5#^w zW5R&+QsFZ{FGi33aC^f}%^i+C$fX-bZC>8=$GIof5%{d25vv@N8W}sx)UKL zAL};?5&2!vcv0JtkT8+OWxr3_e8UnZ_2P!r#2%H`{t$KeeKLem?;X_B{%d=Yd)QuLdU?E)R*IRXA$$6R|;GL$J zuQli2dgHS%%y$>l*Z!O1Yi{it4t+ZwPXD+2PB(FSbWh5XRX5}w)mTFDmttxw33(J(Mn#nUn_a3P%EkHsXrzT$yKi&Whe*znmWhwO!1u2ba~4EGQ6z6i6Wa0 zi*w1Z>vi4G-;WrvU)EzkkpDoHj7p_p)|!<{;k;)AuR(d%yn_|>esocd9EyEWTK8nl z!9EYOyriGU^Xny474=YNl0-mj9Y-$`3$y~&lUDWUF^=xllNEAlsw)gW_mV=B>_=ml z6rFq3S6w2kNMMz~bpS8cFy(D8gBPu69^0d9@ogO6N3aiAHi))Oz$QS)ac^R0{?#?I z?Tv>M@8`(BBl5o`@Vf-a7zyW$b$x|;uQmI`PPleG<3*f%J9X9GRB$_F_LpS{bT=;P8efZe5?Ng+=9em diff --git a/nwm_filenames/operational_aws_api/listofnwmfilenames.py b/nwm_filenames/operational_aws_api/listofnwmfilenames.py index e282a79..67456c4 100644 --- a/nwm_filenames/operational_aws_api/listofnwmfilenames.py +++ b/nwm_filenames/operational_aws_api/listofnwmfilenames.py @@ -153,7 +153,7 @@ def select_lead_time(lead_time=None, default=None): def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): - if urlbaseinput: + if urlbaseinput in urlbasedict: return urlbasedict[urlbaseinput] else: return defaulturlbase diff --git a/nwm_filenames/operational_aws_api/test_cases.py b/nwm_filenames/operational_aws_api/test_cases.py index 272c4c4..83242bf 100644 --- a/nwm_filenames/operational_aws_api/test_cases.py +++ b/nwm_filenames/operational_aws_api/test_cases.py @@ -160,7 +160,10 @@ def test_selecturlbase(): "medium_range_blend_coastal_atlgulf", "medium_range_blend_coastal_pacific", "medium_range_coastal_atlgulf_mem1", - "short_range" + "short_range", + "medium_range", + "long_range_mem7", + "medium_range_no_da_mem6" ] import requests @@ -171,11 +174,19 @@ def is_valid_url(url): except requests.ConnectionError: return False + @pytest.mark.parametrize("runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output", [ (1, 1, 1, 0, "201809170000", "201809172300", fcst_cycle_values, 3, None, ["expected_file_name_1"]), (5, 5, 2, 1, "201809170000", "201809171200", fcst_cycle_values, 1, lead_time_values, ["expected_file_name_2"]), (2, 5, 3, 3, "201809170600", "201809171800", fcst_cycle_values, 2, lead_time_values, ["expected_file_name_3"]), - + (1, 1, 5, 4, "201809170200", "201809171400", fcst_cycle_values, 4, lead_time_values, ["expected_file_name_4"]), + (2, 2, 4, 5, "201809170800", "201809172000", fcst_cycle_values, 5, lead_time_values, ["expected_file_name_5"]), + (3, 1, 5, 6, "201809171000", "201809172200", fcst_cycle_values, 6, lead_time_values, ["expected_file_name_6"]), + (4, 2, 5, 7, "201809171200", "201809172400", fcst_cycle_values, 7, lead_time_values, ["expected_file_name_7"]), + (5, 5, 1, 8, "201809171400", "201809172600", fcst_cycle_values, 8, lead_time_values, ["expected_file_name_8"]), + (6, 1, 16, 9, "201809171600", "201809172800", fcst_cycle_values, 9, lead_time_values, ["expected_file_name_9"]), + (8, 5, 3, 12, "201809172200", "201809173400", fcst_cycle_values, 12, lead_time_values, ["expected_file_name_12"]), + (11, 1, 3, 18, "201809173400", "201809174600", fcst_cycle_values, 18, lead_time_values, ["expected_file_name_18"]), ]) def test_create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output): file_list = create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time) From 7c85e4a73b6d58beaee0f704e8b35da269e9c070 Mon Sep 17 00:00:00 2001 From: RohanSunkarapalli <58287801+RohanSunkarapalli@users.noreply.github.com> Date: Thu, 19 Oct 2023 03:31:19 -0500 Subject: [PATCH 16/17] Delete nwm_filenames/operational_aws/.ipynb_checkpoints directory --- .../listofnwmfilenames-checkpoint.py | 441 ------------------ .../test_cases-checkpoint.py | 207 -------- 2 files changed, 648 deletions(-) delete mode 100644 nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py delete mode 100644 nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py diff --git a/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py deleted file mode 100644 index 90829e7..0000000 --- a/nwm_filenames/operational_aws/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py +++ /dev/null @@ -1,441 +0,0 @@ -from gevent import monkey -monkey.patch_all() -from dateutil import rrule -from datetime import datetime, timezone -from itertools import product -from filename_helpers import check_valid_urls -import time - -rundict = { - 1: "short_range", - 2: "medium_range", - 3: "medium_range_no_da", - 4: "long_range", - 5: "analysis_assim", - 6: "analysis_assim_extend", - 7: "analysis_assim_extend_no_da", - 8: "analysis_assim_long", - 9: "analysis_assim_long_no_da", - 10: "analysis_assim_no_da", - 11: "short_range_no_da", -} -memdict = { - 1: "mem_1", - 2: "mem_2", - 3: "mem_3", - 4: "mem_4", - 5: "mem_5", - 6: "mem_6", - 7: "mem_7", -} -vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} -geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} - - -def selectvar(vardict, varinput): - return vardict[varinput] - - -def selectgeo(geodict, geoinput): - return geodict[geoinput] - - -def selectrun(rundict, runinput): - return rundict[runinput] - - -def makename( - date, - run_name, - var_name, - fcst_cycle, - fcst_hour, - geography, - run_type, - fhprefix="", - runsuffix="", - varsuffix="", - run_typesuffix="", - urlbase_prefix="", -): - """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" - - datetxt = f"nwm.{date.strftime('%Y%m%d')}" - foldertxt = f"{run_type}{run_typesuffix}" - filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" - return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" - - -# setting run_type -def run_type(runinput, varinput, geoinput, default=""): - """This function takes the numeric command line input and converts to the text used in the url.""" - - if varinput == 5: # if forcing - if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii - return "forcing_analysis_assim_hawaii" - elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico - return "forcing_analysis_assim_puertorico" - elif runinput == 1 and geoinput == 2: # if short range and hawaii - return "forcing_short_range_hawaii" - elif runinput == 1 and geoinput == 3: # if short range and puerto rico - return "forcing_short_range_puertorico" - elif runinput == 5: # if analysis assim - return "forcing_analysis_assim" - elif runinput == 6: # if analysis_assim_extend - return "forcing_analysis_assim_extend" - elif runinput == 2: # if medium_range - return "forcing_medium_range" - elif runinput == 1: # if short range - return "forcing_short_range" - - elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico - return "analysis_assim_puertorico" - - elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico - return "analysis_assim_puertorico_no_da" - - elif runinput == 1 and geoinput == 3: # if short_range and puerto rico - return "short_range_puertorico" - - elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico - return "short_range_puertorico_no_da" - - else: - return default - - -def fhprefix(runinput): - if 4 <= runinput <= 10: - return "tm" - return "f" - - -def varsuffix(meminput): - if meminput in range(1, 8): - return f"_{meminput}" - else: - return "" - - -def run_typesuffix(meminput): - if meminput in range(1, 8): - return f"_mem{meminput}" - else: - return "" - - -def select_forecast_cycle(fcst_cycle=None, default=None): - if fcst_cycle: - return fcst_cycle - else: - return default - - -def select_lead_time(lead_time=None, default=None): - if lead_time: - return lead_time - else: - return default - - -urlbasedict = { - 0: "", - 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", - 3: "https://storage.googleapis.com/national-water-model/", - 4: "https://storage.cloud.google.com/national-water-model/", - 5: "gs://national-water-model/", - 6: "gcs://national-water-model/", - 7: "https://noaa-nwm-pds.s3.amazonaws.com/", -} - - -def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): - if urlbaseinput in urlbasedict: - return urlbasedict[urlbaseinput] - else: - return defaulturlbase - - -def create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date=None, - end_date=None, - fcst_cycle=None, - urlbaseinput=None, - lead_time=None, # TODO: change this order; placed here to avoid breaking change -): - """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" - - runsuff = "" - - try: - geography = selectgeo(geodict, geoinput) - except: - geography = "geography_error" - try: - run_name = selectrun(rundict, runinput) - except: - run_name = "run_error" - try: - var_name = selectvar(vardict, varinput) - except: - var_name = "variable_error" - try: - urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) - except: - urlbase_prefix = "urlbase_error" - - try: - _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") - _until = datetime.strptime(end_date, "%Y%m%d%H%M") - except: - today = datetime.now(timezone.utc) - _dtstart = today - _until = today - - dates = rrule.rrule( - rrule.DAILY, - dtstart=_dtstart, - until=_until, - ) - run_t = run_type(runinput, varinput, geoinput, run_name) - fhp = fhprefix(runinput) - vsuff = varsuffix(meminput) - rtsuff = run_typesuffix(meminput) - - if runinput == 1: # if short_range - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - elif geoinput == 3: # puertorico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [6]), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif geoinput == 3: # if puerto rico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif runinput == 2: # if medium_range - if varinput == 5: # if forcing - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 19, 6)), - select_lead_time(lead_time, range(1, 241)), - ) - else: - default_fc = range(0, 19, 6) - if meminput == 1: - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 241)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 241, 3)), - ) - else: - raise ValueError("varinput") - elif meminput in range(2, 8): - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 205)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 205, 3)), - ) - else: - raise ValueError("varinput") - else: - raise ValueError("meminput") - elif runinput == 3: # if medium_range_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 6)), - select_lead_time(lead_time, range(3, 240, 3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 4: # if long_range - default_fc = range(0, 19, 6) - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(6, 721, 6)), - ) - elif varinput == 2: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(24, 721, 24)), - ) - else: - raise ValueError("varinput") - elif runinput == 5: # if analysis_assim (simplest form) - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(19)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(20)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(3)), - ) - elif runinput == 6: # if analysis_assim_extend - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - elif runinput == 7: # if analysis_assim_extend_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 8: # if analysis_assim_long - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - elif runinput == 9: # if analysis_assim_long_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 10: # if analysis_assim_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(21)), - select_lead_time(lead_time, range(3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - else: - raise ValueError("run error") - - r = [] - for _dt, _fc, _fh in prod: - r.append( - makename( - _dt, - run_name, - var_name, - _fc, - _fh, - geography, - run_t, - fhp, - runsuff, - vsuff, - rtsuff, - urlbase_prefix, - ) - ) - return r - - -def main(): - - - start_date = "201809170000" - end_date = "201809172300" - fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] - lead_time = [1, 2, 240] - # fcst_cycle = None # Retrieves a full day for each day within the range given. - runinput = 1 - varinput = 1 - geoinput = 1 - meminput = 0 - urlbaseinput = 3 - - file_list = create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date, - end_date, - fcst_cycle, - urlbaseinput, - ) - if len(file_list) == 0: - print(f"No files found") - else: - print(f"Files: {file_list}\nTotal files: {len(file_list)}") - valid_file_list = check_valid_urls(file_list) - print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") - - with open("filenamelist.txt", "w") as file: - for item in valid_file_list: - file.write(f"{item}\n") - - -if __name__ == "__main__": - start = time.time() - main() - print(time.time() - start) diff --git a/nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py b/nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py deleted file mode 100644 index 83242bf..0000000 --- a/nwm_filenames/operational_aws/.ipynb_checkpoints/test_cases-checkpoint.py +++ /dev/null @@ -1,207 +0,0 @@ -import pytest -from datetime import datetime -from listofnwmfilenames import ( - selectvar, - selectgeo, - selectrun, - makename, - run_type, - fhprefix, - varsuffix, - run_typesuffix, - select_forecast_cycle, - select_lead_time, - selecturlbase, - create_file_list, -) - - -def test_selectvar(): - assert selectvar({1: "channel_rt"}, 1) == "channel_rt" - - -def test_selectgeo(): - assert selectgeo({1: "conus"}, 1) == "conus" - - -def test_selectrun(): - assert selectrun({1: "short_range"}, 1) == "short_range" - - -def test_makename(): - assert makename( - datetime(2022, 1, 1, 0, 0, 0, 0), - "short_range", - "channel_rt", - 0, - 1, - "conus", - "forcing", - fhprefix="f", - runsuffix="_test", - varsuffix="_test", - run_typesuffix="_test", - urlbase_prefix="https://example.com/", - ) == "https://example.com/nwm.20220101/forcing_test/nwm.t00z.short_range_test.channel_rt_test.f001.conus.nc" - -@pytest.mark.parametrize("runinput, varinput, geoinput, expected_output", [ - (5, 5, 2, "forcing_analysis_assim_hawaii"), - (5, 5, 3, "forcing_analysis_assim_puertorico"), - (2, 5, 7, "forcing_medium_range"), - (1, 5, 7, "forcing_short_range"), - (1, 3, 3, "short_range_puertorico"), - (1, 5, 2, "forcing_short_range_hawaii"), - (1, 5, 3, "forcing_short_range_puertorico"), - (5, 5, 7, "forcing_analysis_assim"), - (6, 5, 7, "forcing_analysis_assim_extend"), - (5, 3, 3, "analysis_assim_puertorico"), - (10, 3, 3, "analysis_assim_puertorico_no_da"), - (1, 3, 3, "short_range_puertorico"), - (11, 3, 3, "short_range_puertorico_no_da"), - (2, 2, 2, "default_value") # Add a test case for default value -]) -def test_run_type(runinput, varinput, geoinput, expected_output): - assert run_type(runinput, varinput, geoinput, "default_value") == expected_output - - -def test_fhprefix(): - assert fhprefix(5) == "tm" - assert fhprefix(1) == "f" - assert fhprefix(10) == "tm" - - -def test_varsuffix(): - assert varsuffix(1) == "_1" - assert varsuffix(7) == "_7" - assert varsuffix(8) == "" - - -def test_run_typesuffix(): - assert run_typesuffix(1) == "_mem1" - assert run_typesuffix(7) == "_mem7" - assert run_typesuffix(8) == "" - - -def test_select_forecast_cycle(): - assert select_forecast_cycle(12, 0) == 12 - assert select_forecast_cycle(None, 0) == 0 - - -def test_select_lead_time(): - assert select_lead_time(240, 0) == 240 - assert select_lead_time(None, 0) == 0 - - -def test_selecturlbase(): - assert selecturlbase({1: "https://example.com/"}, 1) == "https://example.com/" - assert selecturlbase({1: "https://example.com/"}, 2, "default") == "default" - -fcst_cycle_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] -lead_time_values = [1, 2, 240] -valid_base_urls = [ - "", - "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - "https://storage.googleapis.com/national-water-model/", - "https://storage.cloud.google.com/national-water-model/", - "gs://national-water-model/", - "gcs://national-water-model/", - "https://noaa-nwm-pds.s3.amazonaws.com/", - "https://ciroh-nwm-zarr-copy.s3.amazonaws.com/national-water-model/", -] - -valid_folder_names = [ - "analysis_assim", - "analysis_assim_alaska", - "analysis_assim_alaska_no_da", - "analysis_assim_coastal_atlgulf", - "analysis_assim_coastal_hawaii", - "analysis_assim_coastal_pacific", - "analysis_assim_coastal_puertorico", - "analysis_assim_extend", - "analysis_assim_extend_alaska", - "analysis_assim_extend_alaska_no_da", - "analysis_assim_extend_coastal_atlgulf", - "analysis_assim_extend_coastal_pacific", - "analysis_assim_extend_no_da", - "analysis_assim_hawaii", - "analysis_assim_hawaii_no_da", - "analysis_assim_long", - "analysis_assim_long_no_da", - "analysis_assim_no_da", - "analysis_assim_puertorico", - "analysis_assim_puertorico_no_da", - "forcing_analysis_assim", - "forcing_analysis_assim_alaska", - "forcing_analysis_assim_extend", - "forcing_analysis_assim_extend_alaska", - "forcing_analysis_assim_hawaii", - "forcing_analysis_assim_puertorico", - "forcing_medium_range", - "forcing_medium_range_alaska", - "forcing_medium_range_blend", - "forcing_medium_range_blend_alaska", - "forcing_short_range", - "forcing_short_range_alaska", - "forcing_short_range_hawaii", - "forcing_short_range_puertorico", - "long_range_mem1", - "long_range_mem2", - "long_range_mem3", - "long_range_mem4", - "medium_range_alaska_mem1", - "medium_range_alaska_mem2", - "medium_range_alaska_mem3", - "medium_range_alaska_mem4", - "medium_range_alaska_mem5", - "medium_range_alaska_mem6", - "medium_range_alaska_no_da", - "medium_range_blend", - "medium_range_blend_alaska", - "medium_range_blend_coastal_atlgulf", - "medium_range_blend_coastal_pacific", - "medium_range_coastal_atlgulf_mem1", - "short_range", - "medium_range", - "long_range_mem7", - "medium_range_no_da_mem6" -] -import requests - -def is_valid_url(url): - try: - response = requests.head(url) - return response.status_code < 400 - except requests.ConnectionError: - return False - - -@pytest.mark.parametrize("runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output", [ - (1, 1, 1, 0, "201809170000", "201809172300", fcst_cycle_values, 3, None, ["expected_file_name_1"]), - (5, 5, 2, 1, "201809170000", "201809171200", fcst_cycle_values, 1, lead_time_values, ["expected_file_name_2"]), - (2, 5, 3, 3, "201809170600", "201809171800", fcst_cycle_values, 2, lead_time_values, ["expected_file_name_3"]), - (1, 1, 5, 4, "201809170200", "201809171400", fcst_cycle_values, 4, lead_time_values, ["expected_file_name_4"]), - (2, 2, 4, 5, "201809170800", "201809172000", fcst_cycle_values, 5, lead_time_values, ["expected_file_name_5"]), - (3, 1, 5, 6, "201809171000", "201809172200", fcst_cycle_values, 6, lead_time_values, ["expected_file_name_6"]), - (4, 2, 5, 7, "201809171200", "201809172400", fcst_cycle_values, 7, lead_time_values, ["expected_file_name_7"]), - (5, 5, 1, 8, "201809171400", "201809172600", fcst_cycle_values, 8, lead_time_values, ["expected_file_name_8"]), - (6, 1, 16, 9, "201809171600", "201809172800", fcst_cycle_values, 9, lead_time_values, ["expected_file_name_9"]), - (8, 5, 3, 12, "201809172200", "201809173400", fcst_cycle_values, 12, lead_time_values, ["expected_file_name_12"]), - (11, 1, 3, 18, "201809173400", "201809174600", fcst_cycle_values, 18, lead_time_values, ["expected_file_name_18"]), -]) -def test_create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output): - file_list = create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time) - assert isinstance(file_list, list) - assert all(isinstance(file_name, str) for file_name in file_list) - for url in file_list: - # assert is_valid_url(url), f"Invalid URL: {url}" - assert any(substring in url for substring in valid_folder_names), f"No valid folder name found in URL: {url}" - - - # Check if all base URLs exist in the predefined list - for url in file_list: - assert any(url.startswith(base_url) for base_url in valid_base_urls), f"Invalid base URL in generated URL: {url}" - - - -if __name__ == "__main__": - pytest.main() \ No newline at end of file From 59ddc504db3ee6f656cb0b5f83c69cf7b6d595d4 Mon Sep 17 00:00:00 2001 From: RohanSunkarapalli <58287801+RohanSunkarapalli@users.noreply.github.com> Date: Thu, 19 Oct 2023 03:31:35 -0500 Subject: [PATCH 17/17] Delete nwm_filenames/operational_aws_api/.ipynb_checkpoints directory --- .../listofnwmfilenames-checkpoint.py | 493 ------------------ .../test_cases-checkpoint.py | 207 -------- 2 files changed, 700 deletions(-) delete mode 100644 nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py delete mode 100644 nwm_filenames/operational_aws_api/.ipynb_checkpoints/test_cases-checkpoint.py diff --git a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py b/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py deleted file mode 100644 index 67456c4..0000000 --- a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/listofnwmfilenames-checkpoint.py +++ /dev/null @@ -1,493 +0,0 @@ -from gevent import monkey -monkey.patch_all() -import gevent -from dateutil import rrule -from datetime import datetime, timezone -from itertools import product -#from filename_helpers import check_valid_urls -import time -import requests - -rundict = { - 1: "short_range", - 2: "medium_range", - 3: "medium_range_no_da", - 4: "long_range", - 5: "analysis_assim", - 6: "analysis_assim_extend", - 7: "analysis_assim_extend_no_da", - 8: "analysis_assim_long", - 9: "analysis_assim_long_no_da", - 10: "analysis_assim_no_da", - 11: "short_range_no_da", -} -memdict = { - 1: "mem_1", - 2: "mem_2", - 3: "mem_3", - 4: "mem_4", - 5: "mem_5", - 6: "mem_6", - 7: "mem_7", -} -vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"} -geodict = {1: "conus", 2: "hawaii", 3: "puertorico"} - - -def selectvar(vardict, varinput): - return vardict[varinput] - - -def selectgeo(geodict, geoinput): - return geodict[geoinput] - - -def selectrun(rundict, runinput): - return rundict[runinput] - - -def makename( - date, - run_name, - var_name, - fcst_cycle, - fcst_hour, - geography, - run_type, - fhprefix="", - runsuffix="", - varsuffix="", - run_typesuffix="", - urlbase_prefix="", -): - """This function handles preprocessed text and converts it into the applicable url to access the appropriate file.""" - - datetxt = f"nwm.{date.strftime('%Y%m%d')}" - foldertxt = f"{run_type}{run_typesuffix}" - filetxt = f"nwm.t{fcst_cycle:02d}z.{run_name}{runsuffix}.{var_name}{varsuffix}.{fhprefix}{fcst_hour:03d}.{geography}.nc" - return f"{urlbase_prefix}{datetxt}/{foldertxt}/{filetxt}" - - -# setting run_type -def run_type(runinput, varinput, geoinput, default=""): - """This function takes the numeric command line input and converts to the text used in the url.""" - - if varinput == 5: # if forcing - if runinput == 5 and geoinput == 2: # if analysis_assim and hawaii - return "forcing_analysis_assim_hawaii" - elif runinput == 5 and geoinput == 3: # if analysis_assim and puerto rico - return "forcing_analysis_assim_puertorico" - elif runinput == 1 and geoinput == 2: # if short range and hawaii - return "forcing_short_range_hawaii" - elif runinput == 1 and geoinput == 3: # if short range and puerto rico - return "forcing_short_range_puertorico" - elif runinput == 5: # if analysis assim - return "forcing_analysis_assim" - elif runinput == 6: # if analysis_assim_extend - return "forcing_analysis_assim_extend" - elif runinput == 2: # if medium_range - return "forcing_medium_range" - elif runinput == 1: # if short range - return "forcing_short_range" - - elif runinput == 5 and geoinput == 3: # if analysis_assim and puertorico - return "analysis_assim_puertorico" - - elif runinput == 10 and geoinput == 3: # if analysis_assim_no_da and puertorico - return "analysis_assim_puertorico_no_da" - - elif runinput == 1 and geoinput == 3: # if short_range and puerto rico - return "short_range_puertorico" - - elif runinput == 11 and geoinput == 3: # if short_range_no_da and puerto rico - return "short_range_puertorico_no_da" - - else: - return default - - -def fhprefix(runinput): - if 4 <= runinput <= 10: - return "tm" - return "f" - - -def varsuffix(meminput): - if meminput in range(1, 8): - return f"_{meminput}" - else: - return "" - - -def run_typesuffix(meminput): - if meminput in range(1, 8): - return f"_mem{meminput}" - else: - return "" - - -def select_forecast_cycle(fcst_cycle=None, default=None): - if fcst_cycle: - return fcst_cycle - else: - return default - - -def select_lead_time(lead_time=None, default=None): - if lead_time: - return lead_time - else: - return default - - -urlbasedict = { - 0: "", - 1: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - 2: "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/post-processed/WMS/", - 3: "https://storage.googleapis.com/national-water-model/", - 4: "https://storage.cloud.google.com/national-water-model/", - 5: "gs://national-water-model/", - 6: "gcs://national-water-model/", - 7: "https://noaa-nwm-pds.s3.amazonaws.com/", -} - - -def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""): - if urlbaseinput in urlbasedict: - return urlbasedict[urlbaseinput] - else: - return defaulturlbase - - -def create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date=None, - end_date=None, - fcst_cycle=None, - urlbaseinput=None, - lead_time=None, # TODO: change this order; placed here to avoid breaking change -): - """for given date, run, var, fcst_cycle, and geography, print file names for the valid time (the range of fcst_hours) and dates""" - - runsuff = "" - - try: - geography = selectgeo(geodict, geoinput) - except: - geography = "geography_error" - try: - run_name = selectrun(rundict, runinput) - except: - run_name = "run_error" - try: - var_name = selectvar(vardict, varinput) - except: - var_name = "variable_error" - try: - urlbase_prefix = selecturlbase(urlbasedict, urlbaseinput) - except: - urlbase_prefix = "urlbase_error" - - try: - _dtstart = datetime.strptime(start_date, "%Y%m%d%H%M") - _until = datetime.strptime(end_date, "%Y%m%d%H%M") - except: - today = datetime.now(timezone.utc) - _dtstart = today - _until = today - - dates = rrule.rrule( - rrule.DAILY, - dtstart=_dtstart, - until=_until, - ) - run_t = run_type(runinput, varinput, geoinput, run_name) - fhp = fhprefix(runinput) - vsuff = varsuffix(meminput) - rtsuff = run_typesuffix(meminput) - - if runinput == 1: # if short_range - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - elif geoinput == 3: # puertorico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [6]), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif geoinput == 3: # if puerto rico - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 48)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(1, 19)), - ) - elif runinput == 2: # if medium_range - if varinput == 5: # if forcing - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 19, 6)), - select_lead_time(lead_time, range(1, 241)), - ) - else: - default_fc = range(0, 19, 6) - if meminput == 1: - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 241)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 241, 3)), - ) - else: - raise ValueError("varinput") - elif meminput in range(2, 8): - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(1, 205)), - ) - elif varinput in {2, 4}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(3, 205, 3)), - ) - else: - raise ValueError("varinput") - else: - raise ValueError("meminput") - elif runinput == 3: # if medium_range_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 13, 6)), - select_lead_time(lead_time, range(3, 240, 3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 4: # if long_range - default_fc = range(0, 19, 6) - if varinput in {1, 3}: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(6, 721, 6)), - ) - elif varinput == 2: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, default_fc), - select_lead_time(lead_time, range(24, 721, 24)), - ) - else: - raise ValueError("varinput") - elif runinput == 5: # if analysis_assim (simplest form) - if varinput == 5: # if forcing - if geoinput == 2: # hawaii - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(19)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(20)), - select_lead_time(lead_time, range(3)), - ) - else: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(24)), - select_lead_time(lead_time, range(3)), - ) - elif runinput == 6: # if analysis_assim_extend - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - elif runinput == 7: # if analysis_assim_extend_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, [16]), - select_lead_time(lead_time, range(28)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - elif runinput == 8: # if analysis_assim_long - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - elif runinput == 9: # if analysis_assim_long_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(0, 24, 6)), - select_lead_time(lead_time, range(12)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 10: # if analysis_assim_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(21)), - select_lead_time(lead_time, range(3)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - - elif runinput == 11 and geoinput == 3: # if short_range_puertorico_no_da - if varinput == 1: - prod = product( - dates, - select_forecast_cycle(fcst_cycle, range(6, 19, 12)), - select_lead_time(lead_time, range(1, 49)), - ) - else: - raise ValueError("only valid variable for a _no_da type run is channel_rt") - else: - raise ValueError("run error") - - r = [] - for _dt, _fc, _fh in prod: - r.append( - makename( - _dt, - run_name, - var_name, - _fc, - _fh, - geography, - run_t, - fhp, - runsuff, - vsuff, - rtsuff, - urlbase_prefix, - ) - ) - return r - - -def main(): - - start_date = "201809170000" - end_date = "201809172300" - fcst_cycle = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] - lead_time = [1, 2, 240] - # fcst_cycle = None # Retrieves a full day for each day within the range given. - runinput = 1 - varinput = 5 - geoinput = 1 - meminput = 0 - urlbaseinput = 3 - - file_list = create_file_list( - runinput, - varinput, - geoinput, - meminput, - start_date, - end_date, - fcst_cycle, - urlbaseinput, - ) - if len(file_list) == 0: - print(f"No files found") - else: - #print(f"Files: {file_list}\nTotal files: {len(file_list)}") - paths = link_search_algo(file_list) - tasks = [gevent.spawn(check_directory,path) for path in paths] - gevent.joinall(tasks) - all_links = [] - for task in tasks: - l = task.value - for link in l: - if "https://storage.googleapis.com/national-water-model/"+link in file_list: - all_links.append("https://storage.googleapis.com/national-water-model/"+link) - - print(f"{len(all_links)} files found") - #valid_file_list = check_valid_urls(file_list) - #print(f"Valid Files: {valid_file_list}\nValid files: {len(valid_file_list)}") - - with open("filenamelist.txt", "w") as file: - for item in all_links: - file.write(f"{item}\n") - print("Success") - -def link_search_algo(file_list): - paths = [] - for file_name in file_list: - text_sp = file_name.split("/") - path = "/".join(text_sp[-3:-1])+"/" - if not path in paths: - paths.append(path) - return paths - -DOWNLOAD_LINK = "https://storage.googleapis.com/download/storage/v1/b/national-water-model/o/" -URL = "https://storage.googleapis.com/storage/v1/b/national-water-model/o?delimiter=/&prefix=" -TOKEN_PREFIX = "&pageToken=" - -def check_directory(path): - try: - json = requests.get(URL+path).json() - except Exception as e: - print(f"Couldn't fetch {URL}{path}") - try: - found_files = [i["name"] for i in json["items"]] - except: - return [] - - if "nextPageToken" in json: - return loop_check(URL+path,json["nextPageToken"]) + found_files - return found_files - -def loop_check(url,token): - try: - json = requests.get(url+TOKEN_PREFIX+token).json() - except Exception as e: - print(f"Couldn't fetch {url}") - - found_files = [i["name"] for i in json["items"]] - - if "nextPageToken" in json: - return loop_check(url,json["nextPageToken"]) + found_files - return found_files - - -if __name__ == "__main__": - start = time.time() - main() - print(time.time() - start) diff --git a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/test_cases-checkpoint.py b/nwm_filenames/operational_aws_api/.ipynb_checkpoints/test_cases-checkpoint.py deleted file mode 100644 index 83242bf..0000000 --- a/nwm_filenames/operational_aws_api/.ipynb_checkpoints/test_cases-checkpoint.py +++ /dev/null @@ -1,207 +0,0 @@ -import pytest -from datetime import datetime -from listofnwmfilenames import ( - selectvar, - selectgeo, - selectrun, - makename, - run_type, - fhprefix, - varsuffix, - run_typesuffix, - select_forecast_cycle, - select_lead_time, - selecturlbase, - create_file_list, -) - - -def test_selectvar(): - assert selectvar({1: "channel_rt"}, 1) == "channel_rt" - - -def test_selectgeo(): - assert selectgeo({1: "conus"}, 1) == "conus" - - -def test_selectrun(): - assert selectrun({1: "short_range"}, 1) == "short_range" - - -def test_makename(): - assert makename( - datetime(2022, 1, 1, 0, 0, 0, 0), - "short_range", - "channel_rt", - 0, - 1, - "conus", - "forcing", - fhprefix="f", - runsuffix="_test", - varsuffix="_test", - run_typesuffix="_test", - urlbase_prefix="https://example.com/", - ) == "https://example.com/nwm.20220101/forcing_test/nwm.t00z.short_range_test.channel_rt_test.f001.conus.nc" - -@pytest.mark.parametrize("runinput, varinput, geoinput, expected_output", [ - (5, 5, 2, "forcing_analysis_assim_hawaii"), - (5, 5, 3, "forcing_analysis_assim_puertorico"), - (2, 5, 7, "forcing_medium_range"), - (1, 5, 7, "forcing_short_range"), - (1, 3, 3, "short_range_puertorico"), - (1, 5, 2, "forcing_short_range_hawaii"), - (1, 5, 3, "forcing_short_range_puertorico"), - (5, 5, 7, "forcing_analysis_assim"), - (6, 5, 7, "forcing_analysis_assim_extend"), - (5, 3, 3, "analysis_assim_puertorico"), - (10, 3, 3, "analysis_assim_puertorico_no_da"), - (1, 3, 3, "short_range_puertorico"), - (11, 3, 3, "short_range_puertorico_no_da"), - (2, 2, 2, "default_value") # Add a test case for default value -]) -def test_run_type(runinput, varinput, geoinput, expected_output): - assert run_type(runinput, varinput, geoinput, "default_value") == expected_output - - -def test_fhprefix(): - assert fhprefix(5) == "tm" - assert fhprefix(1) == "f" - assert fhprefix(10) == "tm" - - -def test_varsuffix(): - assert varsuffix(1) == "_1" - assert varsuffix(7) == "_7" - assert varsuffix(8) == "" - - -def test_run_typesuffix(): - assert run_typesuffix(1) == "_mem1" - assert run_typesuffix(7) == "_mem7" - assert run_typesuffix(8) == "" - - -def test_select_forecast_cycle(): - assert select_forecast_cycle(12, 0) == 12 - assert select_forecast_cycle(None, 0) == 0 - - -def test_select_lead_time(): - assert select_lead_time(240, 0) == 240 - assert select_lead_time(None, 0) == 0 - - -def test_selecturlbase(): - assert selecturlbase({1: "https://example.com/"}, 1) == "https://example.com/" - assert selecturlbase({1: "https://example.com/"}, 2, "default") == "default" - -fcst_cycle_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] -lead_time_values = [1, 2, 240] -valid_base_urls = [ - "", - "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/", - "https://storage.googleapis.com/national-water-model/", - "https://storage.cloud.google.com/national-water-model/", - "gs://national-water-model/", - "gcs://national-water-model/", - "https://noaa-nwm-pds.s3.amazonaws.com/", - "https://ciroh-nwm-zarr-copy.s3.amazonaws.com/national-water-model/", -] - -valid_folder_names = [ - "analysis_assim", - "analysis_assim_alaska", - "analysis_assim_alaska_no_da", - "analysis_assim_coastal_atlgulf", - "analysis_assim_coastal_hawaii", - "analysis_assim_coastal_pacific", - "analysis_assim_coastal_puertorico", - "analysis_assim_extend", - "analysis_assim_extend_alaska", - "analysis_assim_extend_alaska_no_da", - "analysis_assim_extend_coastal_atlgulf", - "analysis_assim_extend_coastal_pacific", - "analysis_assim_extend_no_da", - "analysis_assim_hawaii", - "analysis_assim_hawaii_no_da", - "analysis_assim_long", - "analysis_assim_long_no_da", - "analysis_assim_no_da", - "analysis_assim_puertorico", - "analysis_assim_puertorico_no_da", - "forcing_analysis_assim", - "forcing_analysis_assim_alaska", - "forcing_analysis_assim_extend", - "forcing_analysis_assim_extend_alaska", - "forcing_analysis_assim_hawaii", - "forcing_analysis_assim_puertorico", - "forcing_medium_range", - "forcing_medium_range_alaska", - "forcing_medium_range_blend", - "forcing_medium_range_blend_alaska", - "forcing_short_range", - "forcing_short_range_alaska", - "forcing_short_range_hawaii", - "forcing_short_range_puertorico", - "long_range_mem1", - "long_range_mem2", - "long_range_mem3", - "long_range_mem4", - "medium_range_alaska_mem1", - "medium_range_alaska_mem2", - "medium_range_alaska_mem3", - "medium_range_alaska_mem4", - "medium_range_alaska_mem5", - "medium_range_alaska_mem6", - "medium_range_alaska_no_da", - "medium_range_blend", - "medium_range_blend_alaska", - "medium_range_blend_coastal_atlgulf", - "medium_range_blend_coastal_pacific", - "medium_range_coastal_atlgulf_mem1", - "short_range", - "medium_range", - "long_range_mem7", - "medium_range_no_da_mem6" -] -import requests - -def is_valid_url(url): - try: - response = requests.head(url) - return response.status_code < 400 - except requests.ConnectionError: - return False - - -@pytest.mark.parametrize("runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output", [ - (1, 1, 1, 0, "201809170000", "201809172300", fcst_cycle_values, 3, None, ["expected_file_name_1"]), - (5, 5, 2, 1, "201809170000", "201809171200", fcst_cycle_values, 1, lead_time_values, ["expected_file_name_2"]), - (2, 5, 3, 3, "201809170600", "201809171800", fcst_cycle_values, 2, lead_time_values, ["expected_file_name_3"]), - (1, 1, 5, 4, "201809170200", "201809171400", fcst_cycle_values, 4, lead_time_values, ["expected_file_name_4"]), - (2, 2, 4, 5, "201809170800", "201809172000", fcst_cycle_values, 5, lead_time_values, ["expected_file_name_5"]), - (3, 1, 5, 6, "201809171000", "201809172200", fcst_cycle_values, 6, lead_time_values, ["expected_file_name_6"]), - (4, 2, 5, 7, "201809171200", "201809172400", fcst_cycle_values, 7, lead_time_values, ["expected_file_name_7"]), - (5, 5, 1, 8, "201809171400", "201809172600", fcst_cycle_values, 8, lead_time_values, ["expected_file_name_8"]), - (6, 1, 16, 9, "201809171600", "201809172800", fcst_cycle_values, 9, lead_time_values, ["expected_file_name_9"]), - (8, 5, 3, 12, "201809172200", "201809173400", fcst_cycle_values, 12, lead_time_values, ["expected_file_name_12"]), - (11, 1, 3, 18, "201809173400", "201809174600", fcst_cycle_values, 18, lead_time_values, ["expected_file_name_18"]), -]) -def test_create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output): - file_list = create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time) - assert isinstance(file_list, list) - assert all(isinstance(file_name, str) for file_name in file_list) - for url in file_list: - # assert is_valid_url(url), f"Invalid URL: {url}" - assert any(substring in url for substring in valid_folder_names), f"No valid folder name found in URL: {url}" - - - # Check if all base URLs exist in the predefined list - for url in file_list: - assert any(url.startswith(base_url) for base_url in valid_base_urls), f"Invalid base URL in generated URL: {url}" - - - -if __name__ == "__main__": - pytest.main() \ No newline at end of file