From eff4a92bd412181caa1b583353a9ab1762f392fd Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Wed, 20 Sep 2023 19:56:18 -0400 Subject: [PATCH 1/3] Chunk raw data requests by day. --- xdmod_data/_http_requester.py | 44 +++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index 5b9009ad..73e52e32 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -1,3 +1,4 @@ +from datetime import datetime, timedelta import json import os import requests @@ -41,23 +42,36 @@ def _request_data(self, params): ) def _request_raw_data(self, params): - url_params = self.__get_raw_data_url_params(params) limit = self.__get_raw_data_limit() + start_date = datetime.strptime(params['start_date'], '%Y-%m-%d') + end_date = datetime.strptime(params['end_date'], '%Y-%m-%d') data = [] - num_rows = limit - offset = 0 - while num_rows == limit: - response = self._request_json( - path='/rest/v1/warehouse/raw-data?' + url_params - + '&offset=' + str(offset) - ) - partial_data = response['data'] - data += partial_data - if params['show_progress']: - progress_msg = 'Got ' + str(len(data)) + ' rows...' - print(progress_msg, end='\r') - num_rows = len(partial_data) - offset += limit + current_date = start_date + while current_date <= end_date: + current_date_str = current_date.strftime('%Y-%m-%d') + current_params = { + **params, + **{ + 'start_date': current_date_str, + 'end_date': current_date_str, + }, + } + url_params = self.__get_raw_data_url_params(current_params) + num_rows = limit + offset = 0 + while num_rows == limit: + response = self._request_json( + path='/rest/v1/warehouse/raw-data?' + url_params + + '&offset=' + str(offset) + ) + partial_data = response['data'] + data += partial_data + if params['show_progress']: + progress_msg = 'Got ' + str(len(data)) + ' rows...' + print(progress_msg, end='\r') + num_rows = len(partial_data) + offset += limit + current_date = current_date + timedelta(days=1) if params['show_progress']: print(progress_msg + 'DONE') return (data, response['fields']) From c50779183c7d3341d9cc860d911ae058a9e2e49b Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 21 Sep 2023 11:30:31 -0400 Subject: [PATCH 2/3] Update progress message to print number of days. --- xdmod_data/_http_requester.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index 73e52e32..e8c57fd3 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -67,11 +67,19 @@ def _request_raw_data(self, params): partial_data = response['data'] data += partial_data if params['show_progress']: - progress_msg = 'Got ' + str(len(data)) + ' rows...' - print(progress_msg, end='\r') + progress_msg = self.__show_progress( + start_date, + current_date, + len(data), + ) num_rows = len(partial_data) offset += limit current_date = current_date + timedelta(days=1) + progress_msg = self.__show_progress( + start_date, + current_date, + len(data), + ) if params['show_progress']: print(progress_msg + 'DONE') return (data, response['fields']) @@ -157,3 +165,15 @@ def __get_raw_data_limit(self): response = self._request_json('/rest/v1/warehouse/raw-data/limit') self.__raw_data_limit = int(response['data']) return self.__raw_data_limit + + def __show_progress(self, start_date, current_date, num_rows): + num_days = (current_date - start_date).days + progress_msg = ( + 'Got ' + str(num_rows) + ' row' + + ('s' if num_rows != 1 else '') + + ' (' + str(num_days) + ' day' + + ('s' if num_days != 1 else '') + + ')...' + ) + print(progress_msg, end='\r') + return progress_msg From f53030fd24c6db322a8da33b7f716d70c5c618fc Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 21 Sep 2023 16:18:56 -0400 Subject: [PATCH 3/3] Update documentation. --- xdmod_data/warehouse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index 17cddef6..99455b0d 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -168,8 +168,8 @@ def get_raw_data( only be included whose values for each of the given dimensions match one of the corresponding given values. show_progress : bool, optional - If true, periodically print how many rows have been gotten so - far. + If true, periodically print how many rows and how many days in + the date range have been gotten so far. Returns -------