diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index 5b9009ad..e8c57fd3 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -1,3 +1,4 @@ +from datetime import datetime, timedelta import json import os import requests @@ -41,23 +42,44 @@ def _request_data(self, params): ) def _request_raw_data(self, params): - url_params = self.__get_raw_data_url_params(params) limit = self.__get_raw_data_limit() + start_date = datetime.strptime(params['start_date'], '%Y-%m-%d') + end_date = datetime.strptime(params['end_date'], '%Y-%m-%d') data = [] - num_rows = limit - offset = 0 - while num_rows == limit: - response = self._request_json( - path='/rest/v1/warehouse/raw-data?' + url_params - + '&offset=' + str(offset) + current_date = start_date + while current_date <= end_date: + current_date_str = current_date.strftime('%Y-%m-%d') + current_params = { + **params, + **{ + 'start_date': current_date_str, + 'end_date': current_date_str, + }, + } + url_params = self.__get_raw_data_url_params(current_params) + num_rows = limit + offset = 0 + while num_rows == limit: + response = self._request_json( + path='/rest/v1/warehouse/raw-data?' + url_params + + '&offset=' + str(offset) + ) + partial_data = response['data'] + data += partial_data + if params['show_progress']: + progress_msg = self.__show_progress( + start_date, + current_date, + len(data), + ) + num_rows = len(partial_data) + offset += limit + current_date = current_date + timedelta(days=1) + progress_msg = self.__show_progress( + start_date, + current_date, + len(data), ) - partial_data = response['data'] - data += partial_data - if params['show_progress']: - progress_msg = 'Got ' + str(len(data)) + ' rows...' - print(progress_msg, end='\r') - num_rows = len(partial_data) - offset += limit if params['show_progress']: print(progress_msg + 'DONE') return (data, response['fields']) @@ -143,3 +165,15 @@ def __get_raw_data_limit(self): response = self._request_json('/rest/v1/warehouse/raw-data/limit') self.__raw_data_limit = int(response['data']) return self.__raw_data_limit + + def __show_progress(self, start_date, current_date, num_rows): + num_days = (current_date - start_date).days + progress_msg = ( + 'Got ' + str(num_rows) + ' row' + + ('s' if num_rows != 1 else '') + + ' (' + str(num_days) + ' day' + + ('s' if num_days != 1 else '') + + ')...' + ) + print(progress_msg, end='\r') + return progress_msg diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index 17cddef6..99455b0d 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -168,8 +168,8 @@ def get_raw_data( only be included whose values for each of the given dimensions match one of the corresponding given values. show_progress : bool, optional - If true, periodically print how many rows have been gotten so - far. + If true, periodically print how many rows and how many days in + the date range have been gotten so far. Returns -------