From e44167e5ab2ee40ce0b64243466bba1c9afd5811 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Wed, 6 Aug 2025 11:17:07 -0400 Subject: [PATCH 1/3] Warn instead of throwing exception if raw data stream is closed unexpectedly. --- xdmod_data/__version__.py | 2 +- xdmod_data/_http_requester.py | 5 +++-- xdmod_data/warehouse.py | 13 ++++++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/xdmod_data/__version__.py b/xdmod_data/__version__.py index 57e59e58..ed63fb7b 100644 --- a/xdmod_data/__version__.py +++ b/xdmod_data/__version__.py @@ -1,2 +1,2 @@ __title__ = 'xdmod-data' -__version__ = '1.1.0' +__version__ = '1.1.1.dev1' diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index ea76a416..ef20f3b6 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -8,7 +8,8 @@ class _HttpRequester: - def __init__(self, xdmod_host): + def __init__(self, xdmod_host, logger): + self.__logger = logger self.__in_runtime_context = False _validator._assert_str('xdmod_host', xdmod_host) xdmod_host = re.sub('/+$', '', xdmod_host) @@ -89,7 +90,7 @@ def _request_raw_data(self, params): if params['show_progress']: self.__print_progress_msg(num_rows_read, 'DONE\n') if last_line_size != '0': # pragma: no cover - raise RuntimeError( + self.__logger.warning( 'Connection closed before all data were received!' + ' You may need to break your request into smaller' + ' chunks by running `get_raw_data()` multiple times with' diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index ba016f76..db3910a6 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -1,3 +1,4 @@ +import logging import numpy as np import os import pandas as pd @@ -50,7 +51,8 @@ def __init__(self, xdmod_host=None): '`xdmod_host` parameter or `XDMOD_HOST` environment' + ' variable must be set.', ) from None - self.__http_requester = _HttpRequester(xdmod_host) + self.__logger = self.__init_logger() + self.__http_requester = _HttpRequester(xdmod_host, self.__logger) self.__descriptors = _Descriptors(self.__http_requester) def __enter__(self): @@ -456,6 +458,15 @@ def _get_dimension_label(self, realm, dimension_id): d = self.__descriptors._get_aggregate() return d[realm]['dimensions'][dimension_id]['label'] + def __init_logger(self): + logger = logging.getLogger('xdmod_data_warehouse') + logger.setLevel(logging.WARNING) + formatter = logging.Formatter('Warning: %(message)s') + handler = logging.StreamHandler() + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + def __get_data_frame(self, data, column_data, index=None): result = pd.DataFrame( data=data, From 510d24a36132f3ea645cb0abc5669513cc1757bc Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Wed, 6 Aug 2025 16:11:56 -0400 Subject: [PATCH 2/3] Update. --- xdmod_data/_http_requester.py | 66 +++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index ef20f3b6..e136faf1 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -66,37 +66,41 @@ def _request_raw_data(self, params): # line contains the row we care about and the first line # contains the hex size of the second line. is_first_line_in_pair = True - for line in response.iter_lines(): - # There is a bug in Requests (see - # https://github.com/psf/requests/issues/5540) such that empty - # lines are occasionally sent via iter_lines(); ignore these. - if line == b'': - continue - line_text = line.decode('utf-8') - if is_first_line_in_pair: - last_line_size = line_text - # The last line will be of size 0 and should not be - # processed. - elif last_line_size != '0': # pragma: no branch - (data, fields) = self.__process_raw_data_response_row( - line_text, - num_rows_read, - params['show_progress'], - data, - fields, - ) - num_rows_read += 1 - is_first_line_in_pair = not is_first_line_in_pair - if params['show_progress']: - self.__print_progress_msg(num_rows_read, 'DONE\n') - if last_line_size != '0': # pragma: no cover - self.__logger.warning( - 'Connection closed before all data were received!' - + ' You may need to break your request into smaller' - + ' chunks by running `get_raw_data()` multiple times with' - + ' fewer days specified for `duration` and then piecing' - + ' the resulting data frames back together.', - ) + connection_closed_warning_msg = ( + 'Connection closed before all data were received!' + + ' You may need to break your request into smaller' + + ' chunks by running `get_raw_data()` multiple times with' + + ' fewer days specified for `duration` and then piecing' + + ' the resulting data frames back together.' + ) + try: + for line in response.iter_lines(): + # There is a bug in Requests (see + # https://github.com/psf/requests/issues/5540) such that empty + # lines are occasionally sent via iter_lines(); ignore these. + if line == b'': + continue + line_text = line.decode('utf-8') + if is_first_line_in_pair: + last_line_size = line_text + # The last line will be of size 0 and should not be + # processed. + elif last_line_size != '0': # pragma: no branch + (data, fields) = self.__process_raw_data_response_row( + line_text, + num_rows_read, + params['show_progress'], + data, + fields, + ) + num_rows_read += 1 + is_first_line_in_pair = not is_first_line_in_pair + if params['show_progress']: + self.__print_progress_msg(num_rows_read, 'DONE\n') + if last_line_size != '0': # pragma: no cover + self.__logger.warning(connection_closed_warning_msg) + except requests.exceptions.ChunkedEncodingError: # pragma: no cover + self.__logger.warning(connection_closed_warning_msg) return (data, fields) def _request_filter_values(self, realm_id, dimension_id): From a690be287155e79db98c72f125428b560b562ac4 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Wed, 6 Aug 2025 16:18:00 -0400 Subject: [PATCH 3/3] Update. --- tests/ci/scripts/run-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/scripts/run-tests.sh b/tests/ci/scripts/run-tests.sh index 4835fb39..f0b0330c 100755 --- a/tests/ci/scripts/run-tests.sh +++ b/tests/ci/scripts/run-tests.sh @@ -24,7 +24,7 @@ for python_container in $python_containers; do docker exec $python_container bash -c 'sudo chown -R circleci:circleci /home/circleci/project' docker exec -w /home/circleci/project $python_container bash -c 'python3 -m pip install --upgrade pip' docker exec -w /home/circleci/project $python_container bash -c 'python3 -m pip install --upgrade flake8 flake8-commas flake8-quotes' - docker exec -w /home/circleci/project $python_container bash -c 'python3 -m flake8 . --max-complexity=10 --max-line-length=160 --show-source --exclude __init__.py' + docker exec -w /home/circleci/project $python_container bash -c 'python3 -m flake8 . --max-complexity=12 --max-line-length=160 --show-source --exclude __init__.py' docker exec -w /home/circleci/project $python_container bash -c 'python3 -m pip install -e .[report]' docker exec -w /home/circleci/project $python_container bash -c 'python3 -m pip install --upgrade python-dotenv pytest coverage' # The minimum version of each dependency should be tested in the