Skip to content

Commit 690ba41

Browse files
committed
fix(partial records): don't persist partial records
1 parent 0af670e commit 690ba41

3 files changed

Lines changed: 28 additions & 9 deletions

File tree

backend/connectors/usgs/source.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
NWISSiteTransformer,
3333
NWISWaterLevelTransformer,
3434
)
35-
from backend.exceptions import USGSRateLimitError
35+
from backend.exceptions import USGSRateLimitError, PartialOrNoDataError
3636
from backend.source import (
3737
BaseWaterLevelSource,
3838
BaseSiteSource,
@@ -91,6 +91,7 @@ def get_records(self):
9191
# if config.end_date:
9292
# params["endDt"] = config.end_dt.date().isoformat()
9393

94+
data: dict = {}
9495
tries: int = 0
9596

9697
while tries < MAX_RETRIES:
@@ -107,7 +108,7 @@ def get_records(self):
107108
)
108109

109110
if response.status_code == 200:
110-
data: dict = response.json()
111+
data = response.json()
111112
break
112113
elif response.status_code == 429:
113114
raise USGSRateLimitError()
@@ -125,6 +126,10 @@ def get_records(self):
125126
tries += 1
126127
time.sleep(tries)
127128

129+
if data == {}:
130+
self.warn("Failed to retrieve site records after multiple attempts.")
131+
raise PartialOrNoDataError("Failed to retrieve site records after multiple attempts.")
132+
128133
records: list = data.get("features", [])
129134

130135
return records
@@ -168,6 +173,8 @@ def get_records(self, site_record):
168173
list_of_sites
169174
]
170175
}
176+
177+
data: dict = {}
171178
tries: int = 0
172179

173180
while tries < MAX_RETRIES:
@@ -184,7 +191,7 @@ def get_records(self, site_record):
184191
timeout=TIMEOUT,
185192
)
186193
if response.status_code == 200:
187-
data: dict = response.json()
194+
data = response.json()
188195
break
189196
elif response.status_code == 429:
190197
raise USGSRateLimitError()
@@ -202,6 +209,10 @@ def get_records(self, site_record):
202209
tries += 1
203210
time.sleep(tries)
204211

212+
if data == {}:
213+
self.warn("Failed to retrieve water level records after multiple attempts.")
214+
raise PartialOrNoDataError("Failed to retrieve water level records after multiple attempts.")
215+
205216
features: list[dict] = data.get("features", [])
206217

207218
standard_features: list[dict] = [self._standardize_record(feature) for feature in features]
@@ -225,6 +236,7 @@ def get_records(self, site_record):
225236
# USGS APIs use cursor pagination, so we can just follow the "next" links until there are no more
226237
while found_next_link:
227238
tries: int = 0
239+
data: dict = {}
228240
while tries < MAX_RETRIES:
229241
try:
230242
response = httpx.get(
@@ -233,7 +245,7 @@ def get_records(self, site_record):
233245
timeout=TIMEOUT,
234246
)
235247
if response.status_code == 200:
236-
data: dict = response.json()
248+
data = response.json()
237249
break
238250
elif response.status_code == 429:
239251
raise USGSRateLimitError()
@@ -250,6 +262,10 @@ def get_records(self, site_record):
250262
tries += 1
251263
time.sleep(tries)
252264
265+
if data == {}:
266+
self.warn("Failed to retrieve paginated water level records after multiple attempts.")
267+
raise PartialOrNoDataError("Failed to retrieve paginated water level records after multiple attempts
268+
253269
features: list[dict] = data.get("features", [])
254270
standard_features: list[dict] = [self._standardize_record(feature) for feature in features]
255271
records.extend(standard_features)

backend/exceptions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
class USGSRateLimitError(Exception):
2+
pass
3+
4+
class PartialOrNoDataError(Exception):
25
pass

backend/unifier.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from backend.persister import BasePersister
2222
from backend.persisters.geoserver import GeoServerPersister
2323
from backend.source import BaseSiteSource
24-
from backend.exceptions import USGSRateLimitError
24+
from backend.exceptions import USGSRateLimitError, PartialOrNoDataError
2525

2626

2727
def health_check(source: BaseSiteSource) -> bool | None:
@@ -171,8 +171,8 @@ def _site_wrapper(site_source, parameter_source, persister, config):
171171
summary_records = parameter_source.read(
172172
site_records, use_summarize, start_ind, end_ind
173173
)
174-
except USGSRateLimitError:
175-
# if a rate limit error is hit we want to remove USGS sites so there aren't partial records
174+
except (USGSRateLimitError, PartialOrNoDataError):
175+
# remove partial records to prevent incomplete data from being saved
176176
persister.sites = persister.sites[:initial_sites_len]
177177
persister.timeseries = persister.timeseries[:initial_timeseries_len]
178178
persister.records = persister.records[:initial_records_len]
@@ -187,8 +187,8 @@ def _site_wrapper(site_source, parameter_source, persister, config):
187187
results = parameter_source.read(
188188
site_records, use_summarize, start_ind, end_ind
189189
)
190-
except USGSRateLimitError:
191-
# if a rate limit error is hit we want to remove USGS sites so there aren't partial records
190+
except (USGSRateLimitError, PartialOrNoDataError):
191+
# remove partial records to prevent incomplete data from being saved
192192
persister.sites = persister.sites[:initial_sites_len]
193193
persister.timeseries = persister.timeseries[:initial_timeseries_len]
194194
break

0 commit comments

Comments
 (0)