diff --git a/CHANGELOG.md b/CHANGELOG.md index 74b032f..80bf398 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - This naming schema also enables the development of datetime filters as the descriptor will apply to the latest datetime within the provided time frame filter, whereas most recent indicates np filters. - removed sites that are not in New Mexico +### Fixed +- removed records from USGS where the value is "-999999" + ## 0.7.0 diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index 9ac50aa..8a9ccd6 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -162,7 +162,7 @@ def get_county_polygon(name, as_wkt=True): _warning(f"Invalid state. {state}") -def get_state_polygon(state, buffer): +def get_state_polygon(state: str, buffer: int | None = None): statefp = _statelookup(state) if statefp: obj = _get_cached_object( diff --git a/backend/config.py b/backend/config.py index 9f0f2d7..0ec1dd7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -15,7 +15,6 @@ # =============================================================================== import os import sys -import time from datetime import datetime, timedelta import shapely.wkt @@ -29,15 +28,6 @@ NMBGMRAnalyteSource, ) from .connectors.bor.source import BORSiteSource, BORAnalyteSource -from .connectors.ckan import ( - HONDO_RESOURCE_ID, - FORT_SUMNER_RESOURCE_ID, - ROSWELL_RESOURCE_ID, -) -from .connectors.ckan.source import ( - OSERoswellSiteSource, - OSERoswellWaterLevelSource, -) from .connectors.nmenv.source import DWBSiteSource, DWBAnalyteSource from .connectors.nmose.source import NMOSEPODSiteSource from .constants import ( @@ -67,7 +57,6 @@ ISCSevenRiversAnalyteSource, ) from .connectors.st2.source import ( - ST2SiteSource, PVACDSiteSource, PVACDWaterLevelSource, EBIDSiteSource, @@ -120,7 +109,7 @@ class Config(Loggable): end_date: str = "" # spatial - bbox: dict # dict or str + bbox: str = "" county: str = "" wkt: str = "" @@ -166,7 +155,6 @@ def __init__(self, model=None, payload=None): # need to initialize logger super().__init__() - self.bbox = {} if model: if model.wkt: self.wkt = model.wkt diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 39c1170..2ec5d10 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -72,9 +72,6 @@ def get_records(self): if config.has_bounds(): params["wkt"] = config.bounding_wkt() - if config.site_limit: - params["limit"] = config.site_limit - if not config.sites_only: if config.parameter.lower() != "waterlevels": @@ -90,18 +87,18 @@ def get_records(self): ) if not config.sites_only: for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None + # print(f"Obtaining well data for {site['properties']['point_id']}") + # well_data = self._execute_json_request( + # _make_url("wells"), + # params={"pointid": site["properties"]["point_id"]}, + # tag="", + # ) + # site["properties"]["formation"] = well_data["formation"] + # site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] # site["properties"]["well_depth_units"] = FEET + site["properties"]["formation"] = None + site["properties"]["well_depth"] = None + site["properties"]["well_depth_units"] = FEET return sites diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index ae62c6b..08b1d68 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -27,8 +27,7 @@ DT_MEASURED, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, - EARLIEST, - LATEST, + TDS, ) from backend.source import get_analyte_search_param, get_terminal_record @@ -46,7 +45,7 @@ def __repr__(self): return "DWBSiteSource" def health(self): - return self.get_records(top=10, analyte="TDS") + return self.get_records(top=10, analyte=TDS) def get_records(self, *args, **kw): diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 92e6fb3..249c7db 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -1,4 +1,4 @@ -import os +from typing import List, Dict, Any from shapely import wkt from backend.connectors import NM_STATE_BOUNDING_POLYGON @@ -20,12 +20,12 @@ class NMOSEPODSiteSource(BaseSiteSource): """ transformer_klass = NMOSEPODSiteTransformer - chunk_size = 5000 + chunk_size: int = 5000 bounding_polygon = NM_STATE_BOUNDING_POLYGON - def get_records(self, *args, **kw) -> dict: + def get_records(self, *args, **kw) -> List[Dict]: config = self.config - params = {} + params: Dict[str, Any] = {} # if config.has_bounds(): # bbox = config.bbox_bounding_points() # params["bBox"] = ",".join([str(b) for b in bbox]) @@ -37,7 +37,9 @@ def get_records(self, *args, **kw) -> dict: # if config.end_date: # params["endDt"] = config.end_dt.date().isoformat() - url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" + url: str = ( + "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" + ) params["where"] = ( "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" @@ -55,11 +57,14 @@ def get_records(self, *args, **kw) -> dict: params["geometry"] = wkt_to_arcgis_json(wkt) params["geometryType"] = "esriGeometryPolygon" - records = [] + records: List = [] i = 1 while 1: rs = self._execute_json_request(url, params, tag="features") - records.extend(rs) + if rs is None: + continue + else: + records.extend(rs) params["resultOffset"] += self.chunk_size if len(rs) < self.chunk_size: break diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index e60029b..25e4e87 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -181,7 +181,11 @@ def _extract_site_records(self, records, site_record): return [ri for ri in records if ri["site_code"] == site_record.id] def _clean_records(self, records): - return [r for r in records if r["value"] is not None and r["value"].strip()] + return [ + r + for r in records + if r["value"] is not None and r["value"].strip() and r["value"] != "-999999" + ] def _extract_source_parameter_results(self, records): return [float(r["value"]) for r in records] diff --git a/backend/persister.py b/backend/persister.py index 38e8493..a89572c 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -21,7 +21,7 @@ import pandas as pd import geopandas as gpd -from backend.logging import Loggable +from backend.logger import Loggable try: from google.cloud import storage diff --git a/backend/source.py b/backend/source.py index b631185..5189258 100644 --- a/backend/source.py +++ b/backend/source.py @@ -15,19 +15,13 @@ # =============================================================================== from json import JSONDecodeError -import click import httpx import shapely.wkt from shapely import MultiPoint -from typing import Union, List +from typing import Union, List, Callable, Dict from backend.constants import ( - MILLIGRAMS_PER_LITER, FEET, - METERS, - PARTS_PER_MILLION, - DTW, - DTW_UNITS, DT_MEASURED, PARAMETER_NAME, PARAMETER_UNITS, @@ -36,7 +30,6 @@ LATEST, ) from backend.logger import Loggable -from backend.persister import BasePersister, CSVPersister from backend.record import ( AnalyteRecord, AnalyteSummaryRecord, @@ -47,7 +40,7 @@ from backend.transformer import BaseTransformer, convert_units -def make_site_list(site_record: list | dict) -> list | str: +def make_site_list(site_record: list[SiteRecord] | SiteRecord) -> list | str: """ Returns a list of site ids, as defined by site_record @@ -67,7 +60,7 @@ def make_site_list(site_record: list | dict) -> list | str: return sites -def get_terminal_record(records: list, tag: Union[str, callable], bookend: str) -> dict: +def get_terminal_record(records: list, tag: Union[str, Callable], bookend: str) -> dict: """ Returns the most recent record based on the tag @@ -106,6 +99,10 @@ def func(x): return sorted(records, key=func)[0] elif bookend == LATEST: return sorted(records, key=func)[-1] + else: + raise ValueError( + f"Invalid bookend {bookend}. Must be either {EARLIEST} or {LATEST}" + ) def get_analyte_search_param(parameter: str, mapping: dict) -> str: @@ -178,11 +175,9 @@ class BaseSource(Loggable): """ transformer_klass = BaseTransformer - config = None - def __init__(self, config=None): + def __init__(self): self.transformer = self.transformer_klass() - self.set_config(config) super().__init__() @property @@ -191,7 +186,7 @@ def tag(self): def set_config(self, config): self.config = config - self.transformer.config = config + self.transformer.set_config(config) def check(self, *args, **kw): return True @@ -205,7 +200,7 @@ def discover(self, *args, **kw): # Methods Already Implemented # ========================================================================== - def _execute_text_request(self, url: str, params=None, **kw) -> str: + def _execute_text_request(self, url: str, params: dict | None = None, **kw) -> str: """ Executes a get request to the provided url and returns the text response. @@ -235,8 +230,8 @@ def _execute_text_request(self, url: str, params=None, **kw) -> str: return "" def _execute_json_request( - self, url: str, params: dict = None, tag: str = None, **kw - ) -> dict: + self, url: str, params: dict | None = None, tag: str | None = None, **kw + ) -> dict | None: """ Executes a get request to the provided url and returns the json response. @@ -256,7 +251,6 @@ def _execute_json_request( dict the json response """ - # print(url) resp = httpx.get(url, params=params, **kw) if tag is None: tag = "data" @@ -269,18 +263,18 @@ def _execute_json_request( return obj except JSONDecodeError: self.warn(f"service responded but with no data. \n{resp.text}") - return [] + return None else: self.warn(f"service responded with status {resp.status_code}") self.warn(f"service responded with text {resp.text}") self.warn(f"service at url: {resp.url}") - return [] + return None # ========================================================================== # Methods Implemented in BaseSiteSource and BaseParameterSource # ========================================================================== - def read(self, *args, **kw) -> list: + def read(self, *args, **kw) -> list | None: """ Returns the records. Implemented in BaseSiteSource and BaseAnalyteSource """ @@ -290,7 +284,7 @@ def read(self, *args, **kw) -> list: # Methods That Need to be Implemented For Each Source # ========================================================================== - def get_records(self, *args, **kw) -> dict: + def get_records(self, *args, **kw) -> List[Dict]: """ Returns records as a dictionary, where the keys are site ids and the values are site or parameter records. @@ -438,7 +432,7 @@ def intersects(self, wkt: str) -> bool: return True - def read(self, *args, **kw) -> List[SiteRecord]: + def read(self, *args, **kw) -> List[SiteRecord] | None: """ Returns a list of transformed site records. Calls self.get_records, which needs to be implemented for each source @@ -455,6 +449,7 @@ def read(self, *args, **kw) -> List[SiteRecord]: return self._transform_sites(records) else: self.warn("No site records returned") + return None def _transform_sites(self, records: list) -> List[SiteRecord]: """ @@ -480,7 +475,7 @@ def _transform_sites(self, records: list) -> List[SiteRecord]: self.log(f"processed nrecords={len(transformed_records)}") return transformed_records - def chunks(self, records: list, chunk_size: int = None) -> list: + def chunks(self, records: list, chunk_size: int | None = None) -> list: """ Returns a list of records split into lists of size chunk_size. If chunk_size less than 1 then the records are not split @@ -614,13 +609,20 @@ def _extract_latest_record(self, records: list) -> dict: return self._extract_terminal_record(records, bookend=LATEST) def read( - self, site_record: SiteRecord, use_summarize: bool, start_ind: int, end_ind: int - ) -> List[ - AnalyteRecord - | AnalyteSummaryRecord - | WaterLevelRecord - | WaterLevelSummaryRecord - ]: + self, + site_record: SiteRecord | list, + use_summarize: bool, + start_ind: int, + end_ind: int, + ) -> ( + List[ + AnalyteRecord + | AnalyteSummaryRecord + | WaterLevelRecord + | WaterLevelSummaryRecord + ] + | None + ): """ Returns a list of transformed parameter records. Transformed parameter records are standardized so that all of the records have the same format. They are @@ -773,6 +775,7 @@ def read( name = ",".join(names) self.warn(f"{name}: No records found") + return None # ========================================================================== # Methods Implemented in BaseAnalyteSource and BaseWaterLevelSource @@ -821,7 +824,7 @@ def _get_output_units(self) -> str: # Methods That Need to be Implemented For Each Source # ========================================================================== - def _extract_site_records(self, records: dict, site_record: dict) -> list: + def _extract_site_records(self, records: list[dict], site_record) -> list: """ Returns all records for a single site as a list of records (which are dictionaries). diff --git a/backend/transformer.py b/backend/transformer.py index 229fc0c..cb3afe5 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -131,8 +131,8 @@ def convert_units( output_units: str, source_parameter_name: str, die_parameter_name: str, - dt: str = None, -) -> tuple[float, float, str]: + dt: str | None = None, +) -> tuple[float, float | None, str]: """ Converts the following units for any parameter value: @@ -198,7 +198,7 @@ def convert_units( the source_parameter_name (e.g. nitrate as n). """ if die_parameter_name == "ph": - conversion_factor = 1 + conversion_factor = 1.0 elif output_units == mgl: if input_units in ["mg/l caco3", "mg/l caco3**"]: if die_parameter_name == "bicarbonate": @@ -210,7 +210,7 @@ def convert_units( elif input_units == "mg/l as n": conversion_factor = 4.427 elif input_units in ["mg/l asno3", "mg/l as no3"]: - conversion_factor = 1 + conversion_factor = 1.0 elif input_units == "ug/l as n": conversion_factor = 0.004427 elif input_units == "pci/l": @@ -220,22 +220,22 @@ def convert_units( elif input_units == tpaf: conversion_factor = 735.47 elif input_units == ppm: - conversion_factor = 1 + conversion_factor = 1.0 elif input_units == output_units: if source_parameter_name in ["nitrate as n", "nitrate (as n)"]: conversion_factor = 4.427 else: - conversion_factor = 1 + conversion_factor = 1.0 elif output_units == ft: if input_units in [m, "meters"]: conversion_factor = 3.28084 elif input_units in [ft, "feet"]: - conversion_factor = 1 + conversion_factor = 1.0 elif output_units == m: if input_units in [ft, "feet"]: conversion_factor = 0.3048 elif input_units in [m, "meters"]: - conversion_factor = 1 + conversion_factor = 1.0 if conversion_factor: return input_value * conversion_factor, conversion_factor, warning @@ -331,13 +331,25 @@ class BaseTransformer(Loggable): """ _cached_polygon = None - config = None + # config = None check_contained = True # ========================================================================== # Methods Already Implemented # ========================================================================== + def set_config(self, config): + """ + Sets the config for the transformer. Called in BaseSource.set_config() + to set the config for both the source and the transformer. + + Parameters + -------- + config: Config + The config to set for the transformer + """ + self.config = config + def do_transform( self, inrecord: dict, *args, **kw ) -> ( @@ -347,6 +359,7 @@ def do_transform( | AnalyteSummaryRecord | WaterLevelSummaryRecord | SummaryRecord + | None ): """ Transforms a record, site or parameter, into a standardized format. @@ -393,55 +406,59 @@ def do_transform( """ # _transform needs to be implemented by each SiteTransformer # _transform is already implemented in each ParameterTransformer - record = self._transform(inrecord, *args, **kw) - if not record: - return + transformed_record = self._transform(inrecord, *args, **kw) + if not transformed_record: + return None # ensure that a site or summary record is contained within the boundaing polygon - if "longitude" in record and "latitude" in record: - if not self.contained(record["longitude"], record["latitude"]): + if "longitude" in transformed_record and "latitude" in transformed_record: + if not self.contained( + transformed_record["longitude"], transformed_record["latitude"] + ): self.warn( - f"Skipping site {record['id']}. It is not within the defined geographic bounds" + f"Skipping site {transformed_record['id']}. It is not within the defined geographic bounds" ) - return + return None - self._post_transform(record, *args, **kw) + self._post_transform(transformed_record, *args, **kw) # standardize datetime - dt = record.get(DT_MEASURED) + dt = transformed_record.get(DT_MEASURED) if dt: - d, t = standardize_datetime(dt, record["id"]) - record["date_measured"] = d - record["time_measured"] = t + d, t = standardize_datetime(dt, transformed_record["id"]) + transformed_record["date_measured"] = d + transformed_record["time_measured"] = t else: - mrd = record.get("latest_datetime") + mrd = transformed_record.get("latest_datetime") if mrd: - d, t = standardize_datetime(mrd, record["id"]) - record["date_measured"] = d - record["time_measured"] = t + d, t = standardize_datetime(mrd, transformed_record["id"]) + transformed_record["date_measured"] = d + transformed_record["time_measured"] = t # convert to proper record type # a record klass holds the original record's data as a dictionary, and has methods to update the record's data and get the record's data klass = self._get_record_klass() - record = klass(record) + klassed_record = klass(transformed_record) # update the record's geographic information and well data if it is a SiteRecord or SummaryRecord # transforms the horizontal datum and lon/lat coordinates to WGS84 # transforms the elevation and well depth units to the output unit specified in the config # transforms the well depth and well depth units to the output unit specified in the config - if isinstance(record, (SiteRecord, SummaryRecord)): - y = float(record.latitude) - x = float(record.longitude) + if isinstance(klassed_record, (SiteRecord, SummaryRecord)): + y = float(klassed_record.latitude) + x = float(klassed_record.longitude) if x == 0 or y == 0: - self.warn(f"Skipping site {record.id}. Latitude or Longitude is 0") + self.warn( + f"Skipping site {klassed_record.id}. Latitude or Longitude is 0" + ) return None - input_horizontal_datum = record.horizontal_datum + input_horizontal_datum = klassed_record.horizontal_datum if input_horizontal_datum not in ALLOWED_DATUMS: self.warn( - f"Skipping site {record.id}. Datum {input_horizontal_datum} cannot be processed" + f"Skipping site {klassed_record.id}. Datum {input_horizontal_datum} cannot be processed" ) return None @@ -462,43 +479,43 @@ def do_transform( if not self.in_nm(lng, lat): self.warn( - f"Skipping site {record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not within 25km of New Mexico" + f"Skipping site {klassed_record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not within 25km of New Mexico" ) return None - record.update(latitude=lat) - record.update(longitude=lng) - record.update(horizontal_datum=datum) + klassed_record.update(latitude=lat) + klassed_record.update(longitude=lng) + klassed_record.update(horizontal_datum=datum) elevation, elevation_unit = transform_length_units( - record.elevation, - record.elevation_units, + klassed_record.elevation, + klassed_record.elevation_units, output_elevation_units, ) - record.update(elevation=elevation) - record.update(elevation_units=elevation_unit) + klassed_record.update(elevation=elevation) + klassed_record.update(elevation_units=elevation_unit) well_depth, well_depth_unit = transform_length_units( - record.well_depth, - record.well_depth_units, + klassed_record.well_depth, + klassed_record.well_depth_units, well_depth_units, ) - record.update(well_depth=well_depth) - record.update(well_depth_units=well_depth_unit) + klassed_record.update(well_depth=well_depth) + klassed_record.update(well_depth_units=well_depth_unit) # update the units to the output unit for analyte records # this is done after converting the units to the output unit for the analyte records # convert the parameter value to the output unit specified in the config - elif isinstance(record, (AnalyteRecord, WaterLevelRecord)): - if isinstance(record, AnalyteRecord): + elif isinstance(klassed_record, (AnalyteRecord, WaterLevelRecord)): + if isinstance(klassed_record, AnalyteRecord): output_units = self.config.analyte_output_units else: output_units = self.config.waterlevel_output_units - source_result = record.parameter_value - source_unit = record.source_parameter_units - dt = record.date_measured - source_name = record.source_parameter_name + source_result = klassed_record.parameter_value + source_unit = klassed_record.source_parameter_units + dt = klassed_record.date_measured + source_name = klassed_record.source_parameter_name conversion_factor = None # conversion factor will remain None if record is kept for time series and cannot be converted, such as non-detects warning_msg = "" try: @@ -511,24 +528,24 @@ def do_transform( dt, ) if warning_msg != "": - msg = f"{warning_msg} for {record.id}" + msg = f"{warning_msg} for {klassed_record.id}" self.warn(msg) except TypeError: - msg = f"Keeping {source_result} for {record.id} on {record.date_measured} for time series data" + msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result except ValueError: - msg = f"Keeping {source_result} for {record.id} on {record.date_measured} for time series data" + msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result if warning_msg == "": - record.update(conversion_factor=conversion_factor) - record.update(parameter_value=converted_result) + klassed_record.update(conversion_factor=conversion_factor) + klassed_record.update(parameter_value=converted_result) else: - record = None + klassed_record = None - return record + return klassed_record def in_nm(self, lng: float | int | str, lat: float | int | str) -> bool: """ @@ -667,7 +684,7 @@ def _get_record_klass(self): class SiteTransformer(BaseTransformer): - def _get_record_klass(self) -> SiteRecord: + def _get_record_klass(self) -> type[SiteRecord]: """ Returns the SiteRecord class to use for the transformer for all site records @@ -786,7 +803,9 @@ def _transform_latest_record(self, record, site_id): class WaterLevelTransformer(ParameterTransformer): - def _get_record_klass(self) -> WaterLevelRecord | WaterLevelSummaryRecord: + def _get_record_klass( + self, + ) -> type[WaterLevelRecord] | type[WaterLevelSummaryRecord]: """ Returns the WaterLevelRecord class to use for the transformer for water level records if config.output_summary is False, otherwise @@ -815,7 +834,7 @@ def _get_parameter_name_and_units(self) -> tuple: class AnalyteTransformer(ParameterTransformer): - def _get_record_klass(self) -> AnalyteRecord | AnalyteSummaryRecord: + def _get_record_klass(self) -> type[AnalyteRecord] | type[AnalyteSummaryRecord]: """ Returns the AnalyteRecord class to use for the transformer for water level records if config.output_summary is False, otherwise diff --git a/backend/unifier.py b/backend/unifier.py index 524f695..ce03a3d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -17,12 +17,12 @@ from backend.config import Config, get_source from backend.constants import WATERLEVELS -from backend.logging import setup_logging +from backend.logger import setup_logging from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister from backend.source import BaseSiteSource -def health_check(source: BaseSiteSource) -> bool: +def health_check(source: BaseSiteSource) -> bool | None: """ Determines if data can be returned from the source (if it is healthy) @@ -39,6 +39,8 @@ def health_check(source: BaseSiteSource) -> bool: source = get_source(source) if source: return bool(source.health()) + else: + return None def unify_analytes(config): @@ -135,19 +137,16 @@ def _site_wrapper(site_source, parameter_source, persister, config): return sites_with_records_count = 0 - start_ind = 1 + start_ind = 0 end_ind = 0 first_flag = True if config.sites_only: persister.sites.extend(sites) else: - for sites in site_source.chunks(sites): - if site_limit and sites_with_records_count == site_limit: - break - - if type(sites) == list: - n = len(sites) + for site_records in site_source.chunks(sites): + if type(site_records) == list: + n = len(site_records) if first_flag: first_flag = False else: @@ -157,25 +156,64 @@ def _site_wrapper(site_source, parameter_source, persister, config): if use_summarize: summary_records = parameter_source.read( - sites, use_summarize, start_ind, end_ind + site_records, use_summarize, start_ind, end_ind ) if summary_records: persister.records.extend(summary_records) + sites_with_records_count += len(summary_records) + else: + continue else: results = parameter_source.read( - sites, use_summarize, start_ind, end_ind + site_records, use_summarize, start_ind, end_ind ) # no records are returned if there is no site record for parameter # or if the record isn't clean (doesn't have the correct fields) # don't count these sites to apply to site_limit if results is None or len(results) == 0: continue + else: + sites_with_records_count += len(results) for site, records in results: persister.timeseries.append((site, records)) persister.sites.append(site) - sites_with_records_count += 1 + if site_limit: + # print( + # "sites_with_records_count:", + # sites_with_records_count, + # "|", + # "site_limit:", + # site_limit, + # "|", + # "chunk_size:", + # site_source.chunk_size, + # ) + + if sites_with_records_count >= site_limit: + # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded + num_sites_to_remove = sites_with_records_count - site_limit + # print( + # f"removing {num_sites_to_remove} to avoid exceeding the site limit" + # ) + + # if sites_with_records_count == sit_limit then num_sites_to_remove = 0 + # and calling list[:0] will retur an empty list, so subtract + # num_sites_to_remove from the length of the list + # to remove the last num_sites_to_remove sites + if use_summarize: + persister.records = persister.records[ + : len(persister.records) - num_sites_to_remove + ] + else: + persister.timeseries = persister.timeseries[ + : len(persister.timeseries) - num_sites_to_remove + ] + persister.sites = persister.sites[ + : len(persister.sites) - num_sites_to_remove + ] + break except BaseException: import traceback diff --git a/frontend/cli.py b/frontend/cli.py index 425fdfa..04e2949 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -130,6 +130,11 @@ def cli(): default="", help="New Mexico county name", ), + click.option( + "--wkt", + default="", + help="Well known text (WKT) representation of a geometry. For example, 'POLYGON((x1 y1, x2 y2, x3 y3, x1 y1))'", + ), ] DEBUG_OPTIONS = [ click.option( @@ -210,7 +215,7 @@ def _add_options(func): @cli.command() @click.argument( - "weave", + "parameter", type=click.Choice(PARAMETER_OPTIONS, case_sensitive=False), required=True, ) @@ -221,12 +226,13 @@ def _add_options(func): @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def weave( - weave, + parameter, output, output_dir, start_date, end_date, bbox, + wkt, county, no_bernco, no_bor, @@ -247,9 +253,8 @@ def weave( """ Get parameter timeseries or summary data """ - parameter = weave # instantiate config and set up parameter - config = setup_config(f"{parameter}", bbox, county, site_limit, dry) + config = setup_config(parameter, bbox, wkt, county, site_limit, dry) config.parameter = parameter # output type @@ -289,16 +294,16 @@ def weave( # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) + config.report() if not dry: - config.report() # prompt user to continue if not click.confirm("Do you want to continue?", default=True): return - - if parameter.lower() == "waterlevels": - unify_waterlevels(config) - else: - unify_analytes(config) + if parameter.lower() == "waterlevels": + unify_waterlevels(config) + else: + unify_analytes(config) + return config @cli.command() @@ -308,6 +313,7 @@ def weave( @add_options(DEBUG_OPTIONS) def wells( bbox, + wkt, county, output_dir, no_bernco, @@ -330,7 +336,7 @@ def wells( Get locations """ - config = setup_config("sites", bbox, county, site_limit, dry) + config = setup_config("sites", bbox, wkt, county, site_limit, dry) config_agencies = [ "bernco", "bor", @@ -370,7 +376,7 @@ def wells( required=True, ) @add_options(SPATIAL_OPTIONS) -def sources(sources, bbox, county): +def sources(sources, bbox, wkt, county): """ List available sources """ @@ -381,6 +387,8 @@ def sources(sources, bbox, county): config.county = county elif bbox: config.bbox = bbox + elif wkt: + config.wkt = wkt parameter = sources config.parameter = parameter @@ -394,7 +402,7 @@ def sources(sources, bbox, county): click.echo(s) -def setup_config(tag, bbox, county, site_limit, dry): +def setup_config(tag, bbox, wkt, county, site_limit, dry): config = Config() if county: click.echo(f"Getting {tag} for county {county}") @@ -403,8 +411,14 @@ def setup_config(tag, bbox, county, site_limit, dry): click.echo(f"Getting {tag} for bounding box {bbox}") # bbox = -105.396826 36.219290, -106.024162 35.384307 config.bbox = bbox + elif wkt: + click.echo(f"Getting {tag} for WKT {wkt}") + config.wkt = wkt - config.site_limit = site_limit + if site_limit: + config.site_limit = int(site_limit) + else: + config.site_limit = None config.dry = dry return config diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..380b366 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +ignore_missing_imports = True +exclude = ^(venv|.github|.mypy_cache|.pytest_cache|nmuwd.egg-info|__pycache__|build|tests/archived) \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..8ea4712 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +; skip archived tests but keep for reference +norecursedirs = tests/archived \ No newline at end of file diff --git a/setup.py b/setup.py index 9f43e06..f06990d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.8.1", + version="0.8.2", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..bcf9e80 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,8 @@ +def recursively_clean_directory(path): + """Recursively delete all files and directories in the given path.""" + for item in path.iterdir(): + if item.is_dir(): + recursively_clean_directory(item) + else: + item.unlink() + path.rmdir() diff --git a/tests/archived/__init__.py b/tests/archived/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cli.py b/tests/archived/test_cli.py similarity index 55% rename from tests/test_cli.py rename to tests/archived/test_cli.py index 3d65365..3e53924 100644 --- a/tests/test_cli.py +++ b/tests/archived/test_cli.py @@ -218,190 +218,189 @@ def test_waterlevels_invalid_end(): _tester(waterlevels, args, fail=True) -# -# def _tester(source, func, county, bbox, args=None): -# runner = CliRunner() -# -# nosources = [ -# f -# for f in ( -# "--no-amp", -# "--no-nwis", -# "--no-st2", -# "--no-bor", -# "--no-dwb", -# "--no-wqp", -# "--no-isc-seven-rivers", -# "--no-ckan", -# ) -# if f != f"--no-{source}" -# ] -# -# dargs = nosources + ["--site-limit", 10] -# -# if args: -# args += dargs -# else: -# args = dargs -# -# if county: -# args.extend(("--county", county)) -# elif bbox: -# args.extend(("--bbox", bbox)) -# -# print(" ".join([str(f) for f in args])) -# result = runner.invoke(func, args) -# -# return result +def _tester(source, func, county, bbox, args=None): + runner = CliRunner() + nosources = [ + f + for f in ( + "--no-amp", + "--no-nwis", + "--no-st2", + "--no-bor", + "--no-dwb", + "--no-wqp", + "--no-isc-seven-rivers", + "--no-ckan", + ) + if f != f"--no-{source}" + ] + + dargs = nosources + ["--site-limit", 10] + + if args: + args += dargs + else: + args = dargs + + if county: + args.extend(("--county", county)) + elif bbox: + args.extend(("--bbox", bbox)) + + print(" ".join([str(f) for f in args])) + result = runner.invoke(func, args) + + return result + + +def _summary_tester(source, func, county=None, bbox=None, args=None): + if not (county or bbox): + county = "eddy" + + runner = CliRunner() + # with runner.isolated_filesystem(): + # result = _tester(source, func, county, bbox, args) + # assert result.exit_code == 0 + # assert os.path.isfile("output.csv") + + +def _timeseries_tester( + source, + func, + combined_flag=True, + timeseries_flag=True, + county=None, + bbox=None, + args=None, +): + if args is None: + args = [] + # runner = CliRunner() + # with runner.isolated_filesystem(): + # result = _tester(source, func, county, bbox, args=args + ["--timeseries"]) + # assert result.exit_code == 0 + # print("combined", os.path.isfile("output.combined.csv"), combined_flag) + # assert os.path.isfile("output.combined.csv") == combined_flag + # print("timeseries", os.path.isdir("output_timeseries"), timeseries_flag) + # assert os.path.isdir("output_timeseries") == timeseries_flag + + +# ====== Analyte Tests ======================================================= +def _analyte_summary_tester(key): + _summary_tester(key, analytes, args=["TDS"]) + + +def _analyte_county_tester(source, **kw): + _timeseries_tester(source, analytes, args=["TDS"], county="eddy", **kw) + + +def test_unify_analytes_amp(): + _analyte_county_tester("amp", timeseries_flag=False) + + +def test_unify_analytes_wqp(): + _analyte_county_tester("wqp") + + +def test_unify_analytes_bor(): + _analyte_county_tester("bor", combined_flag=False) + + +def test_unify_analytes_isc_seven_rivers(): + _analyte_county_tester("isc-seven-rivers") + + +def test_unify_analytes_dwb(): + _analyte_county_tester("dwb", timeseries_flag=False) + + +def test_unify_analytes_wqp_summary(): + _analyte_summary_tester("wqp") + + +def test_unify_analytes_bor_summary(): + _analyte_summary_tester("bor") -# def _summary_tester(source, func, county=None, bbox=None, args=None): -# if not (county or bbox): -# county = "eddy" -# -# runner = CliRunner() -# # with runner.isolated_filesystem(): -# # result = _tester(source, func, county, bbox, args) -# # assert result.exit_code == 0 -# # assert os.path.isfile("output.csv") -# -# -# def _timeseries_tester( -# source, -# func, -# combined_flag=True, -# timeseries_flag=True, -# county=None, -# bbox=None, -# args=None, -# ): -# if args is None: -# args = [] -# # runner = CliRunner() -# # with runner.isolated_filesystem(): -# # result = _tester(source, func, county, bbox, args=args + ["--timeseries"]) -# # assert result.exit_code == 0 -# # print("combined", os.path.isfile("output.combined.csv"), combined_flag) -# # assert os.path.isfile("output.combined.csv") == combined_flag -# # print("timeseries", os.path.isdir("output_timeseries"), timeseries_flag) -# # assert os.path.isdir("output_timeseries") == timeseries_flag -# -# -# # ====== Analyte Tests ======================================================= -# def _analyte_summary_tester(key): -# _summary_tester(key, analytes, args=["TDS"]) -# -# -# def _analyte_county_tester(source, **kw): -# _timeseries_tester(source, analytes, args=["TDS"], county="eddy", **kw) -# -# -# def test_unify_analytes_amp(): -# _analyte_county_tester("amp", timeseries_flag=False) -# -# -# def test_unify_analytes_wqp(): -# _analyte_county_tester("wqp") -# -# -# def test_unify_analytes_bor(): -# _analyte_county_tester("bor", combined_flag=False) -# -# -# def test_unify_analytes_isc_seven_rivers(): -# _analyte_county_tester("isc-seven-rivers") -# -# -# def test_unify_analytes_dwb(): -# _analyte_county_tester("dwb", timeseries_flag=False) -# -# -# def test_unify_analytes_wqp_summary(): -# _analyte_summary_tester("wqp") -# -# -# def test_unify_analytes_bor_summary(): -# _analyte_summary_tester("bor") -# -# -# def test_unify_analytes_amp_summary(): -# _analyte_summary_tester("amp") -# -# -# def test_unify_analytes_dwb_summary(): -# _analyte_summary_tester("dwb") -# -# -# def test_unify_analytes_isc_seven_rivers_summary(): -# _analyte_summary_tester("isc-seven-rivers") + +def test_unify_analytes_amp_summary(): + _analyte_summary_tester("amp") + + +def test_unify_analytes_dwb_summary(): + _analyte_summary_tester("dwb") + + +def test_unify_analytes_isc_seven_rivers_summary(): + _analyte_summary_tester("isc-seven-rivers") # ====== End Analyte Tests ======================================================= # ====== Water Level Tests ======================================================= -# def _waterlevel_county_tester(source, **kw): -# _timeseries_tester(source, waterlevels, county="eddy", **kw) -# -# -# def _waterlevel_bbox_tester(source, **kw): -# _timeseries_tester(source, waterlevels, bbox="-104.5 32.5,-104 33", **kw) +def _waterlevel_county_tester(source, **kw): + _timeseries_tester(source, waterlevels, county="eddy", **kw) -# -# def test_unify_waterlevels_nwis(): -# _waterlevel_county_tester("nwis", timeseries_flag=False) -# -# -# def test_unify_waterlevels_amp(): -# _waterlevel_county_tester("amp", timeseries_flag=False) -# -# -# def test_unify_waterlevels_st2(): -# _waterlevel_county_tester("st2", combined_flag=False) -# -# -# def test_unify_waterlevels_isc_seven_rivers(): -# _waterlevel_county_tester("isc-seven-rivers") -# -# -# def test_unify_waterlevels_ckan(): -# _waterlevel_county_tester("ckan") -# -# -# def test_unify_waterlevels_nwis_summary(): -# _summary_tester("nwis", waterlevels) -# -# -# def test_unify_waterlevels_amp_summary(): -# _summary_tester("amp", waterlevels) -# -# -# def test_unify_waterlevels_st2_summary(): -# _summary_tester("st2", waterlevels) -# -# -# def test_unify_waterlevels_isc_seven_rivers_summary(): -# _summary_tester("isc-seven-rivers", waterlevels) -# -# -# def test_unify_waterlevels_nwis_bbox(): -# _waterlevel_bbox_tester("nwis", timeseries_flag=False) -# -# -# def test_unify_waterlevels_amp_bbox(): -# _waterlevel_bbox_tester("amp") -# -# -# def test_unify_waterlevels_st2_bbox(): -# _waterlevel_bbox_tester("st2", combined_flag=False) -# -# -# def test_unify_waterlevels_isc_seven_rivers_bbox(): -# _waterlevel_bbox_tester("isc-seven-rivers", combined_flag=False) -# -# -# def test_unify_waterlevels_ckan_bbox(): -# _waterlevel_bbox_tester("ckan") + +def _waterlevel_bbox_tester(source, **kw): + _timeseries_tester(source, waterlevels, bbox="-104.5 32.5,-104 33", **kw) + + +def test_unify_waterlevels_nwis(): + _waterlevel_county_tester("nwis", timeseries_flag=False) + + +def test_unify_waterlevels_amp(): + _waterlevel_county_tester("amp", timeseries_flag=False) + + +def test_unify_waterlevels_st2(): + _waterlevel_county_tester("st2", combined_flag=False) + + +def test_unify_waterlevels_isc_seven_rivers(): + _waterlevel_county_tester("isc-seven-rivers") + + +def test_unify_waterlevels_ckan(): + _waterlevel_county_tester("ckan") + + +def test_unify_waterlevels_nwis_summary(): + _summary_tester("nwis", waterlevels) + + +def test_unify_waterlevels_amp_summary(): + _summary_tester("amp", waterlevels) + + +def test_unify_waterlevels_st2_summary(): + _summary_tester("st2", waterlevels) + + +def test_unify_waterlevels_isc_seven_rivers_summary(): + _summary_tester("isc-seven-rivers", waterlevels) + + +def test_unify_waterlevels_nwis_bbox(): + _waterlevel_bbox_tester("nwis", timeseries_flag=False) + + +def test_unify_waterlevels_amp_bbox(): + _waterlevel_bbox_tester("amp") + + +def test_unify_waterlevels_st2_bbox(): + _waterlevel_bbox_tester("st2", combined_flag=False) + + +def test_unify_waterlevels_isc_seven_rivers_bbox(): + _waterlevel_bbox_tester("isc-seven-rivers", combined_flag=False) + + +def test_unify_waterlevels_ckan_bbox(): + _waterlevel_bbox_tester("ckan") # ====== End Water Level Tests ======================================================= diff --git a/tests/test_unifier.py b/tests/archived/test_unifier.py similarity index 100% rename from tests/test_unifier.py rename to tests/archived/test_unifier.py diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py new file mode 100644 index 0000000..84923b8 --- /dev/null +++ b/tests/test_cli/__init__.py @@ -0,0 +1,239 @@ +from click.testing import CliRunner +from logging import shutdown as logger_shutdown +from pathlib import Path +import pytest +from typing import List + +from backend.config import SOURCE_KEYS +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from frontend.cli import weave +from tests import recursively_clean_directory + + +class BaseCLITestClass: + + runner: CliRunner + agency: str + agency_reports_parameter: dict + output_dir: Path + + @pytest.fixture(autouse=True) + def setup(self): + # SETUP CODE ----------------------------------------------------------- + self.runner = CliRunner() + + # RUN TESTS ------------------------------------------------------------ + yield + + # TEARDOWN CODE --------------------------------------------------------- + logger_shutdown() + recursively_clean_directory(self.output_dir) + + def _test_weave( + self, + parameter: str, + output: str, + site_limit: int = 4, + start_date: str = "1990-08-10", + end_date: str = "1990-08-11", + bbox: str | None = None, + county: str | None = None, + wkt: str | None = None, + ): + # Arrange + # turn off all sources except for the one being tested + no_agencies = [] + for source in SOURCE_KEYS: + source_with_dash = source.replace("_", "-") + if source_with_dash == self.agency: + continue + else: + no_agencies.append(f"--no-{source_with_dash}") + + geographic_filter_name: str | None = None + geographic_filter_value: str | None = None + if bbox: + geographic_filter_name = "bbox" + geographic_filter_value = bbox + elif county: + geographic_filter_name = "county" + geographic_filter_value = county + elif wkt: + geographic_filter_name = "wkt" + geographic_filter_value = wkt + + arguments = [ + parameter, + "--output", + output, + "--dry", + "--site-limit", + str(site_limit), + "--start-date", + start_date, + "--end-date", + end_date, + ] + + if geographic_filter_name and geographic_filter_value: + arguments.extend([f"--{geographic_filter_name}", geographic_filter_value]) + + arguments.extend(no_agencies) + + # Act + result = self.runner.invoke(weave, arguments, standalone_mode=False) + + # Assert + assert result.exit_code == 0 + + """ + For the config, check that + + 0. (set output dir to clean up tests results even in event of failure) + 1. The parameter is set correctly + 2. The agencies are set correctly + 3. The output types are set correctly + 4. The site limit is set correctly + 5. The dry is set correctly + 6. The start date is set correctly + 7. The end date is set correctly + 8. The geographic filter is set correctly + """ + config = result.return_value + + # 0 + self.output_dir = Path(config.output_path) + + # 1 + assert getattr(config, "parameter") == parameter + + # 2 + agency_with_underscore = self.agency.replace("-", "_") + if self.agency_reports_parameter[parameter]: + assert getattr(config, f"use_source_{agency_with_underscore}") is True + else: + assert getattr(config, f"use_source_{agency_with_underscore}") is False + + for no_agency in no_agencies: + no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") + assert getattr(config, f"use_source_{no_agency_with_underscore}") is False + + # 3 + output_types = ["summary", "timeseries_unified", "timeseries_separated"] + for output_type in output_types: + if output_type == output: + assert getattr(config, f"output_{output_type}") is True + else: + assert getattr(config, f"output_{output_type}") is False + + # 4 + assert getattr(config, "site_limit") == 4 + + # 5 + assert getattr(config, "dry") is True + + # 6 + assert getattr(config, "start_date") == start_date + + # 7 + assert getattr(config, "end_date") == end_date + + # 8 + if geographic_filter_name and geographic_filter_value: + for _geographic_filter_name in ["bbox", "county", "wkt"]: + if _geographic_filter_name == geographic_filter_name: + assert ( + getattr(config, _geographic_filter_name) + == geographic_filter_value + ) + else: + assert getattr(config, _geographic_filter_name) == "" + + def test_weave_summary(self): + self._test_weave(parameter=WATERLEVELS, output="summary") + + def test_weave_timeseries_unified(self): + self._test_weave(parameter=WATERLEVELS, output="timeseries_unified") + + def test_weave_timeseries_separated(self): + self._test_weave(parameter=WATERLEVELS, output="timeseries_separated") + + def test_weave_bbox(self): + self._test_weave( + parameter=WATERLEVELS, output="summary", bbox="32.0,-106.0,36.0,-102.0" + ) + + def test_weave_county(self): + self._test_weave(parameter=WATERLEVELS, output="summary", county="Bernalillo") + + def test_weave_wkt(self): + self._test_weave( + parameter=WATERLEVELS, + output="summary", + wkt="POLYGON((-106.0 32.0, -102.0 32.0, -102.0 36.0, -106.0 36.0, -106.0 32.0))", + ) + + def test_weave_waterlevels(self): + self._test_weave(parameter=WATERLEVELS, output="summary") + + def test_weave_arsenic(self): + self._test_weave(parameter=ARSENIC, output="summary") + + def test_weave_bicarbonate(self): + self._test_weave(parameter=BICARBONATE, output="summary") + + def test_weave_calcium(self): + self._test_weave(parameter=CALCIUM, output="summary") + + def test_weave_carbonate(self): + self._test_weave(parameter=CARBONATE, output="summary") + + def test_weave_chloride(self): + self._test_weave(parameter=CHLORIDE, output="summary") + + def test_weave_fluoride(self): + self._test_weave(parameter=FLUORIDE, output="summary") + + def test_weave_magnesium(self): + self._test_weave(parameter=MAGNESIUM, output="summary") + + def test_weave_nitrate(self): + self._test_weave(parameter=NITRATE, output="summary") + + def test_weave_ph(self): + self._test_weave(parameter=PH, output="summary") + + def test_weave_potassium(self): + self._test_weave(parameter=POTASSIUM, output="summary") + + def test_weave_silica(self): + self._test_weave(parameter=SILICA, output="summary") + + def test_weave_sodium(self): + self._test_weave(parameter=SODIUM, output="summary") + + def test_weave_sulfate(self): + self._test_weave(parameter=SULFATE, output="summary") + + def test_weave_tds(self): + self._test_weave(parameter=TDS, output="summary") + + def test_weave_uranium(self): + self._test_weave(parameter=URANIUM, output="summary") diff --git a/tests/test_cli/test_bernco.py b/tests/test_cli/test_bernco.py new file mode 100644 index 0000000..331ed26 --- /dev/null +++ b/tests/test_cli/test_bernco.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestBernCoCLI(BaseCLITestClass): + + agency = "bernco" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } diff --git a/tests/test_cli/test_cabq.py b/tests/test_cli/test_cabq.py new file mode 100644 index 0000000..1748975 --- /dev/null +++ b/tests/test_cli/test_cabq.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestCABQCLI(BaseCLITestClass): + + agency = "cabq" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } diff --git a/tests/test_cli/test_ebid.py b/tests/test_cli/test_ebid.py new file mode 100644 index 0000000..76429f1 --- /dev/null +++ b/tests/test_cli/test_ebid.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestEBIDCLI(BaseCLITestClass): + + agency = "ebid" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } diff --git a/tests/test_cli/test_nmbgmr_amp.py b/tests/test_cli/test_nmbgmr_amp.py new file mode 100644 index 0000000..df4ea49 --- /dev/null +++ b/tests/test_cli/test_nmbgmr_amp.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNMBGMRCLI(BaseCLITestClass): + + agency = "nmbgmr-amp" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: True, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: True, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: True, + } diff --git a/tests/test_cli/test_nmed_dwb.py b/tests/test_cli/test_nmed_dwb.py new file mode 100644 index 0000000..edd9d68 --- /dev/null +++ b/tests/test_cli/test_nmed_dwb.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNMEDDWBCLI(BaseCLITestClass): + + agency = "nmed-dwb" + agency_reports_parameter = { + WATERLEVELS: False, + ARSENIC: True, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: False, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: True, + } diff --git a/tests/test_cli/test_nmose_isc_seven_rivers.py b/tests/test_cli/test_nmose_isc_seven_rivers.py new file mode 100644 index 0000000..0f99e70 --- /dev/null +++ b/tests/test_cli/test_nmose_isc_seven_rivers.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNMOSEISCSevenRiversCLI(BaseCLITestClass): + + agency = "nmose-isc-seven-rivers" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: False, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: False, + } diff --git a/tests/test_cli/test_nmose_roswell.py b/tests/test_cli/test_nmose_roswell.py new file mode 100644 index 0000000..0c2be39 --- /dev/null +++ b/tests/test_cli/test_nmose_roswell.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNMOSERoswellCLI(BaseCLITestClass): + + agency = "nmose-roswell" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } diff --git a/tests/test_cli/test_nwis.py b/tests/test_cli/test_nwis.py new file mode 100644 index 0000000..0fd236a --- /dev/null +++ b/tests/test_cli/test_nwis.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNWISCLI(BaseCLITestClass): + + agency = "nwis" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } diff --git a/tests/test_cli/test_pvacd.py b/tests/test_cli/test_pvacd.py new file mode 100644 index 0000000..041c9a9 --- /dev/null +++ b/tests/test_cli/test_pvacd.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestPVACDCLI(BaseCLITestClass): + + agency = "pvacd" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } diff --git a/tests/test_cli/test_wqp.py b/tests/test_cli/test_wqp.py new file mode 100644 index 0000000..f3beb7b --- /dev/null +++ b/tests/test_cli/test_wqp.py @@ -0,0 +1,42 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestWQPCLI(BaseCLITestClass): + + agency = "wqp" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: True, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: True, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: True, + } diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py new file mode 100644 index 0000000..a18dd94 --- /dev/null +++ b/tests/test_sources/__init__.py @@ -0,0 +1,178 @@ +from logging import shutdown as logger_shutdown +from pathlib import Path +import pytest +from shapely import Geometry + +from backend.config import Config, SOURCE_KEYS +from backend.constants import WATERLEVELS +from backend.logger import setup_logging +from backend.record import SummaryRecord, SiteRecord, ParameterRecord +from backend.unifier import unify_analytes, unify_waterlevels +from tests import recursively_clean_directory + +SUMMARY_RECORD_HEADERS = list(SummaryRecord.keys) +SITE_RECORD_HEADERS = list(SiteRecord.keys) +PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) + + +class BaseSourceTestClass: + parameter: str + units: str + agency: str + bounds: Geometry + + # set site_limit for tests + site_limit: int = 3 + + @pytest.fixture(autouse=True) + def setup(self): + # SETUP CODE ---------------------------------------------------------- + # 1: setup test/config attributes + self.config = Config() + for agency in SOURCE_KEYS: + setattr(self.config, f"use_source_{agency}", False) + setattr(self.config, "site_limit", self.site_limit) + setattr(self.config, "parameter", self.parameter) + setattr(self.config, "units", self.units) + setattr(self.config, f"use_source_{self.agency}", True) + self.config.finalize() + + # 2: initiate logger + setup_logging(path=self.config.output_path) + + # RUN TESTS ------------------------------------------------------------ + yield + + # UNIVERSAL ASSERTIONS ------------------------------------------------- + # 1: log file exists + log_path = Path(self.config.output_path) / "die.log" + assert log_path.exists() + + # TEARDOWN CODE -------------------------------------------------------- + # 1: close logger to delete log file + logger_shutdown() + + # 2: delete newly created dirs and files + path_to_clean = Path(self.config.output_path) + print(f"Cleaning and removing {path_to_clean}") + recursively_clean_directory(path_to_clean) + + # reset test attributes + self.dirs_to_delete = [] + self.config = None + self.unifier = None + + def _run_unifier(self): + if self.parameter == WATERLEVELS: + unify_waterlevels(self.config) + else: + unify_analytes(self.config) + + def _check_sites_file(self): + sites_file = Path(self.config.output_path) / "sites.csv" + assert sites_file.exists() + + with open(sites_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SITE_RECORD_HEADERS + + # +1 for the header + with open(sites_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + + def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): + timeseries_file = Path(timeseries_dir) / timeseries_file_name + assert timeseries_file.exists() + + with open(timeseries_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == PARAMETER_RECORD_HEADERS + + def test_health(self): + # do a health check for the agency + source = self.config.all_site_sources()[0][0] + assert source.health() + + def test_summary(self): + # Arrange -------------------------------------------------------------- + self.config.output_summary = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the summary file + summary_file = Path(self.config.output_path) / "summary.csv" + assert summary_file.exists() + + # Check the column headers + with open(summary_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SUMMARY_RECORD_HEADERS + + # +1 for the header + with open(summary_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + + def test_timeseries_unified(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_unified = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file() + + # Check the timeseries file + timeseries_dir = Path(self.config.output_path) + timeseries_file_name = "timeseries_unified.csv" + self._check_timeseries_file(timeseries_dir, timeseries_file_name) + + def test_timeseries_separated(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_separated = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file() + + # Check the timeseries files + timeseries_dir = Path(self.config.output_path) / "timeseries" + assert len([f for f in timeseries_dir.iterdir()]) == self.site_limit + + for timeseries_file in timeseries_dir.iterdir(): + self._check_timeseries_file(timeseries_dir, timeseries_file.name) + + @pytest.mark.skip(reason="test_date_range not implemented yet") + def test_date_range(self): + pass + + @pytest.mark.skip(reason="test_bounds not implemented yet") + def test_bounds(self): + pass + + @pytest.mark.skip(reason="test_wkt not implemented yet") + def test_wkt(self): + pass + + @pytest.mark.skip(reason="test_county not implemented yet") + def test_county(self): + pass + + @pytest.mark.skip(reason="test_huc not implemented yet") + def test_huc(self): + pass + + @pytest.mark.skip(reason="test_bbox not implemented yet") + def text_bbox(self): + pass diff --git a/tests/test_sources/test_bernco.py b/tests/test_sources/test_bernco.py new file mode 100644 index 0000000..48004a9 --- /dev/null +++ b/tests/test_sources/test_bernco.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestBernCoWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "bernco" diff --git a/tests/test_sources/test_bor.py b/tests/test_sources/test_bor.py new file mode 100644 index 0000000..003391d --- /dev/null +++ b/tests/test_sources/test_bor.py @@ -0,0 +1,9 @@ +from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER +from tests.test_sources import BaseSourceTestClass + + +class TestBoRAnalyte(BaseSourceTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "bor" diff --git a/tests/test_sources/test_cabq.py b/tests/test_sources/test_cabq.py new file mode 100644 index 0000000..9f3ff3c --- /dev/null +++ b/tests/test_sources/test_cabq.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestCABQWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "cabq" diff --git a/tests/test_sources/test_ebid.py b/tests/test_sources/test_ebid.py new file mode 100644 index 0000000..6adfd6f --- /dev/null +++ b/tests/test_sources/test_ebid.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestEBIDWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "ebid" diff --git a/tests/test_sources/test_nmbgmr_amp.py b/tests/test_sources/test_nmbgmr_amp.py new file mode 100644 index 0000000..90bba2c --- /dev/null +++ b/tests/test_sources/test_nmbgmr_amp.py @@ -0,0 +1,16 @@ +from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestNMBGMRWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nmbgmr_amp" + + +class TestNMBGMRAnalyte(BaseSourceTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "nmbgmr_amp" diff --git a/tests/test_sources/test_nmed_dwb.py b/tests/test_sources/test_nmed_dwb.py new file mode 100644 index 0000000..2a27be3 --- /dev/null +++ b/tests/test_sources/test_nmed_dwb.py @@ -0,0 +1,9 @@ +from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER +from tests.test_sources import BaseSourceTestClass + + +class TestNMEDDWBAnalyte(BaseSourceTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "nmed_dwb" diff --git a/tests/test_sources/test_nmose_isc_seven_rivers.py b/tests/test_sources/test_nmose_isc_seven_rivers.py new file mode 100644 index 0000000..55b345e --- /dev/null +++ b/tests/test_sources/test_nmose_isc_seven_rivers.py @@ -0,0 +1,16 @@ +from backend.constants import WATERLEVELS, CALCIUM, FEET, MILLIGRAMS_PER_LITER +from tests.test_sources import BaseSourceTestClass + + +class TestNMOSEISCSevenRiversWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nmose_isc_seven_rivers" + + +class TestNMOSEISCSevenRiversAnalyte(BaseSourceTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "nmose_isc_seven_rivers" diff --git a/tests/test_sources/test_nmose_roswell.py b/tests/test_sources/test_nmose_roswell.py new file mode 100644 index 0000000..585090f --- /dev/null +++ b/tests/test_sources/test_nmose_roswell.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestNMOSERoswellWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nmose_roswell" diff --git a/tests/test_sources/test_nwis.py b/tests/test_sources/test_nwis.py new file mode 100644 index 0000000..b7bf272 --- /dev/null +++ b/tests/test_sources/test_nwis.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestNWISWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nwis" diff --git a/tests/test_sources/test_pvacd.py b/tests/test_sources/test_pvacd.py new file mode 100644 index 0000000..715acf7 --- /dev/null +++ b/tests/test_sources/test_pvacd.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestPVACDWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "pvacd" diff --git a/tests/test_sources/test_wqp.py b/tests/test_sources/test_wqp.py new file mode 100644 index 0000000..4f8437e --- /dev/null +++ b/tests/test_sources/test_wqp.py @@ -0,0 +1,16 @@ +from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET +from tests.test_sources import BaseSourceTestClass + + +class TestWQPWaterlevels(BaseSourceTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "wqp" + + +class TestWQPAnalyte(BaseSourceTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "wqp"