From 6a29f8b55c75ea07c193a6c28afe37a1ce399738 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Wed, 22 Nov 2023 13:14:16 -0500 Subject: [PATCH 1/3] Use streaming for raw data requests. --- CHANGELOG.md | 1 + xdmod_data/_http_requester.py | 78 +++++++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aada8df2..ddfeaca4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Remove limit on number of results returned from `get_filter_values()` ([\#21](https://github.com/ubccr/xdmod-data/pull/21)). - Add a "Feedback / Feature Requests" section to the README ([\#22](https://github.com/ubccr/xdmod-notebooks/pull/22)). - Improve performance of validation of filters and raw fields ([\#18](https://github.com/ubccr/xdmod-data/pull/18)). +- Use streaming for raw data requests ([\#19](https://github.com/ubccr/xdmod-data/pull/19)). ## v1.0.0 (2023-07-21) - Initial release. diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index d90e3af7..1a4f736a 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -42,24 +42,53 @@ def _request_data(self, params): def _request_raw_data(self, params): url_params = self.__get_raw_data_url_params(params) + # Once XDMoD 10.5 is no longer supported, there will be no need to call + # __get_raw_data_limit(), and the if/else statement below will not be + # necessary — only the body of the 'if' branch will be needed. limit = self.__get_raw_data_limit() data = [] - num_rows = limit - offset = 0 - while num_rows == limit: - response = self._request_json( - path='/rest/v1/warehouse/raw-data?' + url_params - + '&offset=' + str(offset) + if limit == 'NA': + response_iter_lines = self.__request( + path='/rest/v1/warehouse/raw-data?' + url_params, + post_fields=None, + stream=True, ) - partial_data = response['data'] - data += partial_data + response_text = '' + i = 0 + for line in response_iter_lines: + response_text += line.decode('utf-8') + if params['show_progress']: + progress_msg = ( + 'Got ' + str(i) + ' row' + ('' if i == 1 else 's') + + '...' + ) + print(progress_msg, end='\r') + i += 1 if params['show_progress']: - progress_msg = 'Got ' + str(len(data)) + ' rows...' - print(progress_msg, end='\r') - num_rows = len(partial_data) - offset += limit - if params['show_progress']: - print(progress_msg + 'DONE') + print(progress_msg + 'DONE') + response = json.loads(response_text) + data = response['data'] + else: + num_rows = limit + offset = 0 + while num_rows == limit: + response = self._request_json( + path='/rest/v1/warehouse/raw-data?' + url_params + + '&offset=' + str(offset) + ) + partial_data = response['data'] + data += partial_data + if params['show_progress']: + progress_msg = ( + 'Got ' + str(len(data)) + ' row' + + ('' if len(data) == 1 else 's') + + '...' + ) + print(progress_msg, end='\r') + num_rows = len(partial_data) + offset += limit + if params['show_progress']: + print(progress_msg + 'DONE') return (data, response['fields']) def _request_filter_values(self, realm_id, dimension_id): @@ -96,7 +125,7 @@ def __assert_connection_to_xdmod_host(self): + '\': ' + str(e) ) from None - def __request(self, path='', post_fields=None): + def __request(self, path='', post_fields=None, stream=False): _validator._assert_runtime_context(self.__in_runtime_context) url = self.__xdmod_host + path if post_fields: @@ -124,7 +153,10 @@ def __request(self, path='', post_fields=None): raise RuntimeError( 'Error ' + str(response.status_code) + msg ) from None - return response.text + if stream: + return response.iter_lines() + else: + return response.text def __get_data_post_fields(self, params): post_fields = { @@ -159,8 +191,18 @@ def __get_raw_data_url_params(self, params): ) return urlencode(results) + # Once XDMoD 10.5 is no longer supported, there will be no need for this + # method. def __get_raw_data_limit(self): if self.__raw_data_limit is None: - response = self._request_json('/rest/v1/warehouse/raw-data/limit') - self.__raw_data_limit = int(response['data']) + try: + response = self._request_json( + '/rest/v1/warehouse/raw-data/limit' + ) + self.__raw_data_limit = int(response['data']) + except RuntimeError as e: + if '404' in str(e): + self.__raw_data_limit = 'NA' + else: + raise return self.__raw_data_limit From 5e2d94977bc72f43f1f606658b9ad6ff6cfb1083 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Wed, 22 May 2024 14:10:30 -0400 Subject: [PATCH 2/3] Change to stream JSON text sequence. --- xdmod_data/_http_requester.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index 1a4f736a..4ebd0492 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -53,21 +53,23 @@ def _request_raw_data(self, params): post_fields=None, stream=True, ) - response_text = '' i = 0 for line in response_iter_lines: - response_text += line.decode('utf-8') - if params['show_progress']: - progress_msg = ( - 'Got ' + str(i) + ' row' + ('' if i == 1 else 's') - + '...' - ) - print(progress_msg, end='\r') + line_text = line.decode('utf-8').replace('\x1e', '') + line_json = json.loads(line_text) + if i == 0: + response = {'fields': line_json} + else: + data.append(line_json) + if params['show_progress']: + progress_msg = ( + 'Got ' + str(i) + ' row' + ('' if i == 1 else 's') + + '...' + ) + print(progress_msg, end='\r') i += 1 if params['show_progress']: print(progress_msg + 'DONE') - response = json.loads(response_text) - data = response['data'] else: num_rows = limit offset = 0 From 582123508546905e8a6d618fb3049e0b0b8ef78d Mon Sep 17 00:00:00 2001 From: Aaron Weeden <31246768+aaronweeden@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:30:03 -0400 Subject: [PATCH 3/3] Add period to end of changelog entry. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea84400a..52c4c3bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - Add a "Feedback / Feature Requests" section to the README ([\#22](https://github.com/ubccr/xdmod-notebooks/pull/22)). - Improve performance of validation of filters and raw fields ([\#18](https://github.com/ubccr/xdmod-data/pull/18)). - Fix bug with trailing slashes in `xdmod_host` ([\#24](https://github.com/ubccr/xdmod-data/pull/24)). -- Use streaming for raw data requests ([\#19](https://github.com/ubccr/xdmod-data/pull/19)) +- Use streaming for raw data requests ([\#19](https://github.com/ubccr/xdmod-data/pull/19)). ## v1.0.0 (2023-07-21) - Initial release.