diff --git a/tests/ci/scripts/run-tests.sh b/tests/ci/scripts/run-tests.sh index 4835fb39..c2795429 100755 --- a/tests/ci/scripts/run-tests.sh +++ b/tests/ci/scripts/run-tests.sh @@ -5,7 +5,7 @@ set -exo pipefail export MIN_PYTHON_VERSION=3.8 -export MAX_PYTHON_VERSION=3.13 +export MAX_PYTHON_VERSION=3.14 export XDMOD_11_0_IMAGE=tools-ext-01.ccr.xdmod.org/xdmod:x86_64-rockylinux8.9.20231119-v11.0.0-1.0-03 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/tests/integration/test_datawarehouse_integration.py b/tests/integration/test_datawarehouse_integration.py index 41990bae..7b347257 100644 --- a/tests/integration/test_datawarehouse_integration.py +++ b/tests/integration/test_datawarehouse_integration.py @@ -50,24 +50,38 @@ 'show_progress': False, 'service_provider': 'screw', } + + +def __get_key_error_test_match(param): + realm_text = '' + if param in ['metric', 'dimension']: + realm_text = f' in the \'{VALID_VALUES["realm"]}\' realm' + return f"Value for `{param}` not found{realm_text}: '{INVALID_STR}'" + + KEY_ERROR_TEST_VALUES_AND_MATCHES = { - 'duration': (INVALID_STR, 'Invalid value for `duration`'), - 'realm': (INVALID_STR, r'Realm .* not found'), - 'metric': (INVALID_STR, r'Metric .* not found'), - 'dimension': (INVALID_STR, r'Dimension .* not found'), - 'filter_key': ({INVALID_STR: INVALID_STR}, r'Dimension .* not found'), - 'filter_value': ( - {VALID_DIMENSION: INVALID_STR}, - r'Filter value .* not found', - ), - 'dataset_type': (INVALID_STR, 'Invalid value for `dataset_type`'), - 'aggregation_unit': (INVALID_STR, 'Invalid value for `aggregation_unit`'), - 'parameter': ( + 'filters:key': [ + {INVALID_STR: INVALID_STR}, + __get_key_error_test_match('dimension'), + ], + 'parameter': [ INVALID_STR, 'Parameter .* does not have a list of valid values', - ), - 'field': (INVALID_STR, r'Field .* not found'), + ], } +for param in [ + 'duration', + 'realm', + 'metric', + 'dimension', + 'dataset_type', + 'aggregation_unit', + 'field', +]: + KEY_ERROR_TEST_VALUES_AND_MATCHES[param] = [ + INVALID_STR, + __get_key_error_test_match(param), + ] key_error_test_ids = [] duration_test_ids = [] @@ -110,10 +124,9 @@ ] value_error_test_methods += [method] if 'filters' in METHOD_PARAMS[method]: - for param in ('filter_key', 'filter_value'): - key_error_test_ids += [method + ':' + param] - (value, match) = KEY_ERROR_TEST_VALUES_AND_MATCHES[param] - key_error_test_params += [(method, {'filters': value}, match)] + key_error_test_ids += [method + ':filters:key'] + (value, match) = KEY_ERROR_TEST_VALUES_AND_MATCHES['filters:key'] + key_error_test_params += [(method, {'filters': value}, match)] load_dotenv(Path(os.path.expanduser(TOKEN_PATH)), override=True) @@ -489,3 +502,44 @@ def test_get_resources_invalid_service_provider(dw_methods): # get_resources is not supported in XDMoD < 11.0.2. if XDMOD_VERSION != 'xdmod-11-0': assert result == [] + + +def test_deprecated_raw_field(dw_methods): + with pytest.warns( + FutureWarning, + match=( + "The field name 'Organization' in the 'Jobs' realm is deprecated" + ' and will be removed in a future version of XDMoD. Use' + " 'User Institution' instead." + ), + ): + __run_method( + dw_methods, + 'get_raw_data', + { + 'duration': ['0000-01-01', '0000-01-01'], + 'realm': 'Jobs', + 'fields': ['Organization'], + }, + ) + + +filters_methods = [ + method for method, params in METHOD_PARAMS.items() if 'filters' in params +] + + +@pytest.mark.parametrize('method', filters_methods) +def test_invalid_filter_value(dw_methods, method): + with pytest.warns( + UserWarning, + match=( + f"Filter value not found for the '{VALID_DIMENSION}' dimension in" + f" the '{VALID_VALUES['realm']}' realm: '{INVALID_STR}'" + ), + ): + __run_method( + dw_methods, + method, + {'filters': {VALID_DIMENSION: INVALID_STR}}, + ) diff --git a/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv b/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv index 42a56249..3c76b055 100644 --- a/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv +++ b/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv @@ -1,4 +1,4 @@ -,Local Job Id,Resource,Timezone,System Username (Deidentified),User,Organization,Quality of Service,Submit Time (Timestamp),Start Time (Timestamp),End Time (Timestamp),Eligible Time (Timestamp),Nodes,Cores,GPUs,Memory Used,Wall Time,Wait Time,Core Time,GPU Time,Exit Code,Exit State,Requested Cores,Requested memory,Requested Wall Time,Queue,Decanal Unit,Department,PI Group +,Local Job Id,Resource,Timezone,System Username (Deidentified),User,User Institution,Quality of Service,Submit Time (Timestamp),Start Time (Timestamp),End Time (Timestamp),Eligible Time (Timestamp),Nodes,Cores,GPUs,Memory Used,Wall Time,Wait Time,Core Time,GPU Time,Exit Code,Exit State,Requested Cores,Requested memory,Requested Wall Time,Queue,Decanal Unit,Department,PI Group 0,6110386,Robertson,UTC,4003ebd9b2de239734970b4cf32cc3d6183d2c6b,"Harrier, Hen",Screwdriver,curry,1483118438,1483118438,1483118443,1483118438,1,1,0,-1,5,0,5,0,0:0,COMPLETED,1,48000Mn,172800,white,Computer and Information Science and Engineering,Computer and Computation Research,Computer and Computation Theory 1000,970339,Mortorq,UTC,0b888f86869ffa092ddb8e069c6bb9ec195964ee,Honey-buzzard,Screwdriver,banana-cream,1483064122,1483064124,1483064151,1483064122,1,8,0,-1,27,2,216,0,1:0,FAILED,8,2147486848Mn,108000,black,"Social, Behavioral, and Economic Sciences",Social and Economic Science,Sociology 2000,981731,Mortorq,UTC,0b888f86869ffa092ddb8e069c6bb9ec195964ee,Honey-buzzard,Screwdriver,banana-cream,1483102441,1483102444,1483102466,1483102441,1,8,0,-1,22,3,176,0,1:0,FAILED,8,2147486848Mn,108000,black,"Social, Behavioral, and Economic Sciences",Social and Economic Science,Sociology diff --git a/tests/regression/test_datawarehouse_regression.py b/tests/regression/test_datawarehouse_regression.py index 4c11352d..8bf84b35 100644 --- a/tests/regression/test_datawarehouse_regression.py +++ b/tests/regression/test_datawarehouse_regression.py @@ -240,7 +240,7 @@ def test_get_durations(valid_dw): @pytest.mark.parametrize( - 'service_provider', [[None], ['screw']], + 'service_provider', [[None], ['screw']], ids=['none', 'not-none'], ) def test_get_resources(valid_dw, service_provider): # get_resources is not supported in XDMoD < 11.0.2. diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 5826737e..a344ae22 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -1,21 +1,76 @@ -class _Descriptors: +import pandas as pd +import xdmod_data._utilities as _utilities +import xdmod_data._validator as _validator + + +class _Descriptor: def __init__(self, http_requester): self.__http_requester = http_requester - self.__aggregate = None - self.__raw = None + self.__cached = None + + def _get_data_frame( + self, + data_type, + realm=None, + drop_deprecation_columns=True, + ): + if self.__cached is None: + self.__cached = self._request(self.__http_requester) + if realm is not None: + realm_id = self._get_data_id('realms', realm) + descriptor = self.__cached + if data_type != 'realms': + descriptor = descriptor[realm_id][data_type] + data_frame = pd.DataFrame.from_dict( + descriptor, + orient='index', + dtype='string', + ) + if data_type == 'realms': + data_frame = data_frame['label'].to_frame() + data_frame = data_frame.rename_axis('id') + if drop_deprecation_columns: + data_frame = data_frame.drop( + columns=['deprecated', 'deprecated_names'], + errors='ignore', + ) + data_frame.index = data_frame.index.astype('string') + data_frame.columns = data_frame.columns.astype('string') + return data_frame + + def _get_data_id(self, data_type, value, realm=None): + param_name = data_type.rstrip('s') + _validator._assert_str(param_name, value) + data_frame = self._get_data_frame( + data_type, + realm, + drop_deprecation_columns=False, + ) + data_id = _utilities._get_id_from_data_frame( + value, + data_frame, + param_name, + realm, + ) + if data_id is None: + realm_text = ( + f" in the '{realm}' realm" if realm is not None else '' + ) + raise KeyError( + f"Value for `{param_name}` not found{realm_text}: '{value}'", + ) from None + return data_id - def _get_aggregate(self): - if self.__aggregate is None: - self.__aggregate = self.__request_aggregate() - return self.__aggregate + def _get_label_from_id(self, data_type, data_id, realm=None): + if data_type == 'dimensions' and data_id == 'none': + return None + data_frame = self._get_data_frame(data_type, realm) + return data_frame.loc[data_id, 'label'] - def _get_raw(self): - if self.__raw is None: - self.__raw = self.__request_raw() - return self.__raw - def __request_aggregate(self): - response = self.__http_requester._request_json( +class _AggregateDescriptor(_Descriptor): + def _request(self, http_requester): + response = http_requester._request_json( '/controllers/metric_explorer.php', {'operation': 'get_dw_descripter'}, ) @@ -23,15 +78,7 @@ def __request_aggregate(self): raise RuntimeError( 'Descriptor received with unexpected structure.', ) - return self.__deserialize_aggregate(response['data'][0]['realms']) - - def __request_raw(self): - response = self.__http_requester._request_json( - '/rest/v1/warehouse/export/realms', - ) - return self.__deserialize_raw(response['data']) - - def __deserialize_aggregate(self, serialized_descriptor): + serialized_descriptor = response['data'][0]['realms'] result = {} for realm in serialized_descriptor: result[realm] = {'label': serialized_descriptor[realm]['category']} @@ -45,7 +92,13 @@ def __deserialize_aggregate(self, serialized_descriptor): } return result - def __deserialize_raw(self, serialized_descriptor): + +class _RawDescriptor(_Descriptor): + def _request(self, http_requester): + response = http_requester._request_json( + '/rest/v1/warehouse/export/realms', + ) + serialized_descriptor = response['data'] result = {} for realm in serialized_descriptor: realm_id = realm['id'] @@ -53,8 +106,13 @@ def __deserialize_raw(self, serialized_descriptor): result[realm_id]['fields'] = {} fields = realm['fields'] for field in fields: - result[realm_id]['fields'][field['alias']] = { + r = { 'label': field['display'], 'description': field['documentation'], } + if 'deprecated' in field: + r['deprecated'] = field['deprecated'] + if 'deprecatedNames' in field: + r['deprecated_names'] = field['deprecatedNames'] + result[realm_id]['fields'][field['alias']] = r return result diff --git a/xdmod_data/_response_processor.py b/xdmod_data/_response_processor.py index e015634f..0faeeab0 100644 --- a/xdmod_data/_response_processor.py +++ b/xdmod_data/_response_processor.py @@ -6,23 +6,25 @@ import re -def _process_get_data_response(dw, params, response): - params['metric'] = dw._get_metric_label( - params['realm'], +def _process_get_data_response(aggregate_descriptor, params, response): + params['metric'] = aggregate_descriptor._get_label_from_id( + 'metrics', params['metric'], - ) - params['dimension'] = dw._get_dimension_label( params['realm'], + ) + params['dimension'] = aggregate_descriptor._get_label_from_id( + 'dimensions', params['dimension'], + params['realm'], ) csv_data = csv.reader(response.splitlines()) if params['dataset_type'] == 'timeseries': - return __parse_timeseries_csv_data(dw, params, csv_data) + return __parse_timeseries_csv_data(params, csv_data) else: return __parse_aggregate_csv_data(params, csv_data) -def __parse_timeseries_csv_data(dw, params, csv_data): +def __parse_timeseries_csv_data(params, csv_data): time_values = [] data = [] for line_num, line in enumerate(csv_data): @@ -32,7 +34,6 @@ def __parse_timeseries_csv_data(dw, params, csv_data): time_values.append(__parse_timeseries_date_string(line[0])) data.append(np.asarray(line[1:])) return __get_timeseries_data_frame( - dw, params, data, time_values, @@ -80,7 +81,6 @@ def __parse_timeseries_date_string(date_string): def __get_timeseries_data_frame( - dw, params, data, time_values, @@ -93,11 +93,7 @@ def __get_timeseries_data_frame( dtype='datetime64[ns]', name='Time', ), - columns=__get_timeseries_data_frame_columns( - dw, - params, - dimension_values, - ), + columns=__get_timeseries_data_frame_columns(params, dimension_values), dtype='Float64', ).fillna(value=np.nan) @@ -141,7 +137,7 @@ def __parse_quarter_date_string(date_string): return (date_string, format_) -def __get_timeseries_data_frame_columns(dw, params, dimension_values): +def __get_timeseries_data_frame_columns(params, dimension_values): if params['dimension'] is None: columns = pd.Series( data=params['metric'], diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py new file mode 100644 index 00000000..07243269 --- /dev/null +++ b/xdmod_data/_utilities.py @@ -0,0 +1,75 @@ +import pandas as pd +import warnings + + +def _get_id_from_data_frame( + value, + data_frame, + data_type_label, + realm=None, +): + mask = ( + (data_frame.index == value) + | (data_frame['label'] == value) + ) + deprecated_names_mask = pd.Series(False, index=data_frame.index) + if 'deprecated_names' in data_frame.columns: + deprecated_names_mask = data_frame['deprecated_names'].apply( + lambda deprecated_names: ( + not pd.isna(deprecated_names) + and value in deprecated_names + ), + ) + mask |= deprecated_names_mask + matches = data_frame.index[mask] + if matches.empty: + return None + data_id = matches[0] + __warn_if_deprecated( + value, + data_frame, + data_type_label, + data_id, + deprecated_names_mask, + realm, + ) + return data_id + + +def __warn_if_deprecated( + name, + data_frame, + data_type_label, + data_id, + deprecated_names_mask, + realm, +): + realm_text = '' + warn = False + if realm is not None: + realm_text = f" in the '{realm}' realm" + if 'deprecated' in data_frame.columns: + deprecated = data_frame.loc[data_id, 'deprecated'] + if not pd.isna(deprecated) and deprecated: + alternative_text = data_frame.loc[data_id, 'description'].replace( + 'DEPRECATED: ', + '', + ) + warn = True + if not warn and deprecated_names_mask.any(): + label = data_frame.loc[data_id, 'label'] + alternative = label + if data_id != label: + alternative = f"{data_id}' or '{label}" + alternative_text = f"Use '{alternative}' instead." + warn = True + if warn: + warnings.warn( + ( + f"The {data_type_label} name '{name}'{realm_text} is" + f' deprecated and will be removed in a future version of' + f" XDMoD. {alternative_text}" + ), + FutureWarning, + stacklevel=7, + ) diff --git a/xdmod_data/_validator.py b/xdmod_data/_validator.py index 646aa7f1..0dfeac0b 100644 --- a/xdmod_data/_validator.py +++ b/xdmod_data/_validator.py @@ -1,4 +1,6 @@ from datetime import date, timedelta +import xdmod_data._utilities as _utilities +import warnings def _assert_str(name, value): @@ -14,25 +16,28 @@ def _assert_runtime_context(in_runtime_context): ) -def _validate_get_data_params(data_warehouse, descriptors, params): +def _validate_get_data_params(data_warehouse, aggregate_descriptor, params): results = {} (results['start_date'], results['end_date']) = ( __validate_duration(params['duration']) ) - results['realm'] = _find_realm_id(descriptors, params['realm']) - results['metric'] = __find_metric_id( - descriptors, - results['realm'], - params['metric'], + results['realm'] = aggregate_descriptor._get_data_id( + 'realms', + params['realm'], ) - results['dimension'] = _find_dimension_id( - descriptors, + results['metric'] = aggregate_descriptor._get_data_id( + 'metrics', + params['metric'], results['realm'], + ) + results['dimension'] = aggregate_descriptor._get_data_id( + 'dimensions', params['dimension'], + results['realm'], ) results['filters'] = __validate_filters( data_warehouse, - descriptors, + aggregate_descriptor, results['realm'], params['filters'], ) @@ -49,21 +54,26 @@ def _validate_get_data_params(data_warehouse, descriptors, params): return results -def _validate_get_raw_data_params(data_warehouse, descriptors, params): +def _validate_get_raw_data_params( + data_warehouse, + aggregate_descriptor, + raw_descriptor, + params, +): results = {} (results['start_date'], results['end_date']) = ( __validate_duration(params['duration']) ) - results['realm'] = _find_raw_realm_id(descriptors, params['realm']) + results['realm'] = raw_descriptor._get_data_id('realms', params['realm']) results['fields'] = __validate_raw_fields( - data_warehouse, - params['realm'], + raw_descriptor, + results['realm'], params['fields'], ) results['filters'] = __validate_filters( data_warehouse, - descriptors, - params['realm'], + aggregate_descriptor, + results['realm'], params['filters'], ) results['show_progress'] = __assert_bool( @@ -73,23 +83,6 @@ def _validate_get_raw_data_params(data_warehouse, descriptors, params): return results -def _find_realm_id(descriptors, realm): - return __find_id_in_descriptor( - descriptors._get_aggregate(), - 'realm', - realm, - ) - - -def _find_dimension_id(descriptors, realm, dimension): - return __find_metric_or_dimension_id( - descriptors, - realm, - 'dimension', - dimension, - ) - - def _get_durations(): this_year = date.today().year six_years_ago = this_year - 6 @@ -128,14 +121,6 @@ def _get_aggregation_units(): ) -def _find_raw_realm_id(descriptors, realm): - return __find_id_in_descriptor( - descriptors._get_raw(), - 'realm', - realm, - ) - - def __assert_type(name, value, type_, type_name): if not isinstance(value, type_): raise TypeError('`' + name + '` must be a ' + type_name + '.') @@ -161,20 +146,15 @@ def __validate_duration(duration): return (start_date, end_date) -def __find_metric_id(descriptors, realm, metric): - return __find_metric_or_dimension_id( - descriptors, - realm, - 'metric', - metric, - ) - - -def __validate_filters(data_warehouse, descriptors, realm, filters): +def __validate_filters(data_warehouse, aggregate_descriptor, realm, filters): try: result = {} for dimension in filters: - dimension_id = _find_dimension_id(descriptors, realm, dimension) + dimension_id = aggregate_descriptor._get_data_id( + 'dimensions', + dimension, + realm, + ) filter_values = filters[dimension] if isinstance(filter_values, str): filter_values = [filter_values] @@ -184,12 +164,24 @@ def __validate_filters(data_warehouse, descriptors, realm, filters): dimension, ) for filter_value in filter_values: - new_filter_value = __find_value_in_df( - 'Filter value', - valid_filter_values, + new_filter_value = _utilities._get_id_from_data_frame( filter_value, + valid_filter_values, + 'filter value', + realm, ) - result[dimension_id].append(new_filter_value) + if new_filter_value is None: + warnings.warn( + ( + f"Filter value not found for the '{dimension}'" + f" dimension in the '{realm}' realm:" + f' {filter_value!r}' + ), + UserWarning, + stacklevel=4, + ) + else: + result[dimension_id].append(new_filter_value) return result except TypeError: raise TypeError( @@ -209,19 +201,23 @@ def __find_str_in_sequence(value, sequence, label): transformed_valid_value = __lowercase_and_remove_spaces(valid_value) if transformed_valid_value == transformed_value: return valid_value + sequence_str = "', '".join(sequence) raise KeyError( - 'Invalid value for `' + label + "`: '" + value + "'" - + ". Valid values are: '" + "', '".join(sequence) + "'.", + f"Value for `{label}` not found: '{value}'. Valid values are:" + f" '{sequence_str}'.", ) from None -def __validate_raw_fields(data_warehouse, realm, fields): +def __validate_raw_fields(raw_descriptor, realm, fields): try: results = [] - valid_raw_fields = data_warehouse.describe_raw_fields(realm) for field in fields: - new_field = __find_value_in_df('Field', valid_raw_fields, field) - results.append(new_field) + field_id = raw_descriptor._get_data_id('fields', field, realm) + if field_id is None: + raise KeyError( + f"Raw field not found in the {realm} realm: '{field}'.", + ) from None + results.append(field_id) return results except TypeError: raise TypeError( @@ -229,24 +225,6 @@ def __validate_raw_fields(data_warehouse, realm, fields): ) from None -def __find_id_in_descriptor(descriptor, name, value): - _assert_str(name, value) - for id_ in descriptor: - if id_ == value or descriptor[id_]['label'] == value: - return id_ - raise KeyError( - name.capitalize() + " '" + value + "' not found.", - ) - - -def __find_metric_or_dimension_id(descriptors, realm, m_or_d, value): - return __find_id_in_descriptor( - descriptors._get_aggregate()[realm][m_or_d + 's'], - m_or_d, - value, - ) - - def __get_dates_from_duration(duration): today = date.today() yesterday = today + timedelta(days=-1) @@ -315,15 +293,6 @@ def __get_dates_from_duration(duration): return durations_to_dates[duration] -def __find_value_in_df(label, df, value): - if value in df.index: - return value - elif value in df['label'].values: - return df.index[df['label'] == value].tolist()[0] - else: - raise KeyError(label + " '" + value + "' not found.") - - def __lowercase_and_remove_spaces(value): return value.lower().replace(' ', '') diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index ba016f76..896d263b 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -1,7 +1,7 @@ import numpy as np import os import pandas as pd -from xdmod_data._descriptors import _Descriptors +from xdmod_data._descriptors import _AggregateDescriptor, _RawDescriptor from xdmod_data._http_requester import _HttpRequester import xdmod_data._response_processor as _response_processor import xdmod_data._validator as _validator @@ -51,7 +51,10 @@ def __init__(self, xdmod_host=None): + ' variable must be set.', ) from None self.__http_requester = _HttpRequester(xdmod_host) - self.__descriptors = _Descriptors(self.__http_requester) + self.__aggregate_descriptor = _AggregateDescriptor( + self.__http_requester, + ) + self.__raw_descriptor = _RawDescriptor(self.__http_requester) def __enter__(self): self.__in_runtime_context = True @@ -143,16 +146,24 @@ def get_data( If any of the arguments are of the wrong type. ValueError If `duration` is an object but not of length 2. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`, `metric`, + `dimension`, and/or one of the keys of `filters`. + UserWarning + If a value in `filters` is provided that is not found. """ _validator._assert_runtime_context(self.__in_runtime_context) params = _validator._validate_get_data_params( self, - self.__descriptors, + self.__aggregate_descriptor, locals(), ) response = self.__http_requester._request_data(params) return _response_processor._process_get_data_response( - self, + self.__aggregate_descriptor, params, response.text, ) @@ -212,15 +223,29 @@ def get_raw_data( If any of the arguments are of the wrong type. ValueError If `duration` is an object but not of length 2. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`, `fields`, and/or + one of the keys of `filters`. + UserWarning + If a value in `filters` is provided that is not found. """ _validator._assert_runtime_context(self.__in_runtime_context) params = _validator._validate_get_raw_data_params( self, - self.__descriptors, + self.__aggregate_descriptor, + self.__raw_descriptor, locals(), ) (data, column_data) = self.__http_requester._request_raw_data(params) - return self.__get_data_frame(data, column_data) + result = pd.DataFrame( + data, + columns=pd.Series(column_data, dtype='string'), + dtype='string', + ).fillna(value=np.nan) + return result def describe_realms(self): """Get a data frame describing the valid realms in the data warehouse. @@ -237,11 +262,7 @@ def describe_realms(self): there is an error requesting data from the warehouse. """ _validator._assert_runtime_context(self.__in_runtime_context) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_aggregate(), - ('id', 'label'), - 'id', - ) + return self.__aggregate_descriptor._get_data_frame('realms') def describe_metrics(self, realm): """Get a data frame describing the valid metrics for the given realm. @@ -267,8 +288,14 @@ def describe_metrics(self, realm): there is an error requesting data from the warehouse. TypeError If `realm` is not a string. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`. """ - return self.__describe_metrics_or_dimensions(realm, 'metrics') + _validator._assert_runtime_context(self.__in_runtime_context) + return self.__aggregate_descriptor._get_data_frame('metrics', realm) def describe_dimensions(self, realm): """Get a data frame describing the valid dimensions for the given @@ -295,8 +322,14 @@ def describe_dimensions(self, realm): there is an error requesting data from the warehouse. TypeError If `realm` is not a string. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`. """ - return self.__describe_metrics_or_dimensions(realm, 'dimensions') + _validator._assert_runtime_context(self.__in_runtime_context) + return self.__aggregate_descriptor._get_data_frame('dimensions', realm) def get_filter_values(self, realm, dimension): """Get a data frame containing the valid filter values for the given @@ -328,20 +361,28 @@ def get_filter_values(self, realm, dimension): there is an error requesting data from the warehouse. TypeError If `realm` or `dimension` are not strings. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm` or `dimension`. """ _validator._assert_runtime_context(self.__in_runtime_context) - realm_id = _validator._find_realm_id(self.__descriptors, realm) - dimension_id = _validator._find_dimension_id( - self.__descriptors, - realm_id, + realm_id = self.__aggregate_descriptor._get_data_id('realms', realm) + dimension_id = self.__aggregate_descriptor._get_data_id( + 'dimensions', dimension, + realm_id, ) response_data = self.__http_requester._request_filter_values( realm_id, dimension_id, ) - data = [(datum['id'], datum['name']) for datum in response_data] - result = self.__get_data_frame(data, ('id', 'label'), 'id') + result = pd.DataFrame( + data=[(datum['id'], datum['name']) for datum in response_data], + columns=pd.Series(['id', 'label'], dtype='string'), + dtype='string', + ).set_index('id') return result def get_durations(self): @@ -381,11 +422,7 @@ def describe_raw_realms(self): there is an error requesting data from the warehouse. """ _validator._assert_runtime_context(self.__in_runtime_context) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_raw(), - ('id', 'label'), - 'id', - ) + return self.__raw_descriptor._get_data_frame('realms') def describe_raw_fields(self, realm): """Get a data frame describing the raw data fields for the given realm. @@ -412,14 +449,14 @@ def describe_raw_fields(self, realm): there is an error requesting data from the warehouse. TypeError If `realm` is not a string. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`. """ _validator._assert_runtime_context(self.__in_runtime_context) - realm_id = _validator._find_raw_realm_id(self.__descriptors, realm) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_raw()[realm_id]['fields'], - ('id', 'label', 'description'), - 'id', - ) + return self.__raw_descriptor._get_data_frame('fields', realm) def get_resources(self, service_provider=None): """Get a dictionary containing information about the configured @@ -445,47 +482,3 @@ def get_resources(self, service_provider=None): """ _validator._assert_runtime_context(self.__in_runtime_context) return self.__http_requester._request_resources(service_provider) - - def _get_metric_label(self, realm, metric_id): - d = self.__descriptors._get_aggregate() - return d[realm]['metrics'][metric_id]['label'] - - def _get_dimension_label(self, realm, dimension_id): - if dimension_id == 'none': - return None - d = self.__descriptors._get_aggregate() - return d[realm]['dimensions'][dimension_id]['label'] - - def __get_data_frame(self, data, column_data, index=None): - result = pd.DataFrame( - data=data, - columns=pd.Series( - data=column_data, - dtype='string', - ), - dtype='string', - ).fillna(value=np.nan) - if index: - result = result.set_index(index) - return result - - def __get_data_frame_from_descriptor( - self, - descriptor, - columns, - index=None, - ): - data = [ - [id_] + [descriptor[id_][column] for column in columns[1:]] - for id_ in descriptor - ] - return self.__get_data_frame(data, columns, index) - - def __describe_metrics_or_dimensions(self, realm, m_or_d): - _validator._assert_runtime_context(self.__in_runtime_context) - realm_id = _validator._find_realm_id(self.__descriptors, realm) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_aggregate()[realm_id][m_or_d], - ('id', 'label', 'description'), - 'id', - )