From 8c8a1520a7632a2651af2c46dc8a055782620202 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 17:47:29 -0400 Subject: [PATCH 01/11] Add support for deprecated names, raise warning instead of error for unknown filter values, refactor. --- xdmod_data/_descriptors.py | 80 +++++++++++----- xdmod_data/_http_requester.py | 11 +++ xdmod_data/_response_processor.py | 12 ++- xdmod_data/_utilities.py | 59 ++++++++++++ xdmod_data/_validator.py | 140 +++++++++++----------------- xdmod_data/warehouse.py | 149 +++++++++++++++--------------- 6 files changed, 260 insertions(+), 191 deletions(-) create mode 100644 xdmod_data/_utilities.py diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 5826737e..90d66131 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -1,21 +1,52 @@ -class _Descriptors: +import pandas as pd +import warnings +import xdmod_data._utilities as _utilities + +class _Descriptor: def __init__(self, http_requester): self.__http_requester = http_requester - self.__aggregate = None - self.__raw = None + self.__cached = None + + def _get_data_frame(self, data_type, realm=None): + if self.__cached is None: + self.__cached = self._request(self.__http_requester) + if realm is not None: + realm_id = self._get_data_id('realms', realm) + descriptor = self.__cached + if data_type != 'realms': + descriptor = descriptor[realm_id][data_type] + data_frame = pd.DataFrame.from_dict( + descriptor, + orient='index', + dtype='string', + ) + if data_type == 'realms': + data_frame = data_frame['label'].to_frame() + data_frame = data_frame.rename_axis('id') + return data_frame - def _get_aggregate(self): - if self.__aggregate is None: - self.__aggregate = self.__request_aggregate() - return self.__aggregate + def _get_data_id(self, data_type, value, realm=None): + data_type_label = data_type.rstrip('s') + if isinstance(self, _RawDescriptor): + data_type_label = f'raw {data_type_label}' + data_frame = self._get_data_frame(data_type, realm) + return _utilities._get_id_from_data_frame( + value, + data_frame, + data_type_label, + realm, + ) + + def _get_label_from_id(self, data_type, data_id, realm=None): + if data_type == 'dimensions' and data_id == 'none': + return None + data_frame = self._get_data_frame(data_type, realm) + return data_frame.loc[data_id, 'label'] - def _get_raw(self): - if self.__raw is None: - self.__raw = self.__request_raw() - return self.__raw - def __request_aggregate(self): - response = self.__http_requester._request_json( +class _AggregateDescriptor(_Descriptor): + def _request(self, http_requester): + response = http_requester._request_json( '/controllers/metric_explorer.php', {'operation': 'get_dw_descripter'}, ) @@ -23,15 +54,7 @@ def __request_aggregate(self): raise RuntimeError( 'Descriptor received with unexpected structure.', ) - return self.__deserialize_aggregate(response['data'][0]['realms']) - - def __request_raw(self): - response = self.__http_requester._request_json( - '/rest/v1/warehouse/export/realms', - ) - return self.__deserialize_raw(response['data']) - - def __deserialize_aggregate(self, serialized_descriptor): + serialized_descriptor = response['data'][0]['realms'] result = {} for realm in serialized_descriptor: result[realm] = {'label': serialized_descriptor[realm]['category']} @@ -45,7 +68,13 @@ def __deserialize_aggregate(self, serialized_descriptor): } return result - def __deserialize_raw(self, serialized_descriptor): + +class _RawDescriptor(_Descriptor): + def _request(self, http_requester): + response = http_requester._request_json( + '/rest/v1/warehouse/export/realms', + ) + serialized_descriptor = response['data'] result = {} for realm in serialized_descriptor: realm_id = realm['id'] @@ -53,8 +82,11 @@ def __deserialize_raw(self, serialized_descriptor): result[realm_id]['fields'] = {} fields = realm['fields'] for field in fields: - result[realm_id]['fields'][field['alias']] = { + r = { 'label': field['display'], 'description': field['documentation'], } + if 'deprecatedNames' in field: + r['deprecated_names'] = field['deprecatedNames'] + result[realm_id]['fields'][field['alias']] = r return result diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index ea76a416..f42531f6 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -2,6 +2,7 @@ import os import re import requests +from types import SimpleNamespace from urllib.parse import urlencode import xdmod_data._validator as _validator from xdmod_data.__version__ import __title__, __version__ @@ -30,6 +31,16 @@ def _tear_down(self): self.__in_runtime_context = False def _request_data(self, params): + # If any of the filter values are empty lists, don't bother getting the + # data. + if ( + 'filters' in params + and any( + not filter_values + for filter_values in params['filters'].values() + ) + ): + return SimpleNamespace(text='') return self.__request( path='/controllers/user_interface.php', post_fields=self.__get_data_post_fields(params), diff --git a/xdmod_data/_response_processor.py b/xdmod_data/_response_processor.py index e015634f..cc5ad5e0 100644 --- a/xdmod_data/_response_processor.py +++ b/xdmod_data/_response_processor.py @@ -6,14 +6,16 @@ import re -def _process_get_data_response(dw, params, response): - params['metric'] = dw._get_metric_label( - params['realm'], +def _process_get_data_response(aggregate_descriptor, params, response): + params['metric'] = aggregate_descriptor._get_label_from_id( + 'metrics', params['metric'], - ) - params['dimension'] = dw._get_dimension_label( params['realm'], + ) + params['dimension'] = aggregate_descriptor._get_label_from_id( + 'dimensions', params['dimension'], + params['realm'], ) csv_data = csv.reader(response.splitlines()) if params['dataset_type'] == 'timeseries': diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py new file mode 100644 index 00000000..58a3ee1a --- /dev/null +++ b/xdmod_data/_utilities.py @@ -0,0 +1,59 @@ +import pandas as pd +import warnings + + +def _get_id_from_data_frame( + value, + data_frame, + data_type_label, + realm=None, +): + mask = ( + (data_frame.index == value) + | (data_frame['label'] == value) + ) + deprecated_names_mask = pd.Series(False, index=data_frame.index) + if 'deprecated_names' in data_frame.columns: + deprecated_names_mask = data_frame['deprecated_names'].apply( + lambda deprecated_names: ( + not pd.isna(deprecated_names) + and value in deprecated_names + ) + ) + mask |= deprecated_names_mask + matches = data_frame.index[mask] + if matches.empty: + return None + data_id = matches[0] + __warn_if_deprecated_name( + value, + data_frame, + data_type_label, + data_id, + deprecated_names_mask, + realm, + ) + return data_id + + +def __warn_if_deprecated_name( + name, + data_frame, + data_type_label, + data_id, + deprecated_names_mask, + realm, +): + label = data_frame.loc[data_id, 'label'] + alternative = label if data_id == label else f'{data_id}" or "{label}' + realm_text = f' in the "{realm}" realm' if realm is not None else '' + if deprecated_names_mask.any(): + warnings.warn( + ( + f'The {data_type_label} name "{name}"{realm_text} is' + f' deprecated and will be removed in a future version of' + f' XDMoD. Use "{alternative}" instead.' + ), + FutureWarning, + stacklevel=7, + ) diff --git a/xdmod_data/_validator.py b/xdmod_data/_validator.py index 646aa7f1..47b5364b 100644 --- a/xdmod_data/_validator.py +++ b/xdmod_data/_validator.py @@ -1,4 +1,6 @@ from datetime import date, timedelta +import xdmod_data._utilities as _utilities +import warnings def _assert_str(name, value): @@ -14,25 +16,28 @@ def _assert_runtime_context(in_runtime_context): ) -def _validate_get_data_params(data_warehouse, descriptors, params): +def _validate_get_data_params(data_warehouse, aggregate_descriptor, params): results = {} (results['start_date'], results['end_date']) = ( __validate_duration(params['duration']) ) - results['realm'] = _find_realm_id(descriptors, params['realm']) - results['metric'] = __find_metric_id( - descriptors, - results['realm'], - params['metric'], + results['realm'] = aggregate_descriptor._get_data_id( + 'realms', + params['realm'], ) - results['dimension'] = _find_dimension_id( - descriptors, + results['metric'] = aggregate_descriptor._get_data_id( + 'metrics', + params['metric'], results['realm'], + ) + results['dimension'] = aggregate_descriptor._get_data_id( + 'dimensions', params['dimension'], + results['realm'], ) results['filters'] = __validate_filters( data_warehouse, - descriptors, + aggregate_descriptor, results['realm'], params['filters'], ) @@ -49,21 +54,26 @@ def _validate_get_data_params(data_warehouse, descriptors, params): return results -def _validate_get_raw_data_params(data_warehouse, descriptors, params): +def _validate_get_raw_data_params( + data_warehouse, + aggregate_descriptor, + raw_descriptor, + params, +): results = {} (results['start_date'], results['end_date']) = ( __validate_duration(params['duration']) ) - results['realm'] = _find_raw_realm_id(descriptors, params['realm']) + results['realm'] = raw_descriptor._get_data_id('realms', params['realm']) results['fields'] = __validate_raw_fields( - data_warehouse, - params['realm'], + raw_descriptor, + results['realm'], params['fields'], ) results['filters'] = __validate_filters( data_warehouse, - descriptors, - params['realm'], + aggregate_descriptor, + results['realm'], params['filters'], ) results['show_progress'] = __assert_bool( @@ -73,23 +83,6 @@ def _validate_get_raw_data_params(data_warehouse, descriptors, params): return results -def _find_realm_id(descriptors, realm): - return __find_id_in_descriptor( - descriptors._get_aggregate(), - 'realm', - realm, - ) - - -def _find_dimension_id(descriptors, realm, dimension): - return __find_metric_or_dimension_id( - descriptors, - realm, - 'dimension', - dimension, - ) - - def _get_durations(): this_year = date.today().year six_years_ago = this_year - 6 @@ -128,14 +121,6 @@ def _get_aggregation_units(): ) -def _find_raw_realm_id(descriptors, realm): - return __find_id_in_descriptor( - descriptors._get_raw(), - 'realm', - realm, - ) - - def __assert_type(name, value, type_, type_name): if not isinstance(value, type_): raise TypeError('`' + name + '` must be a ' + type_name + '.') @@ -161,20 +146,15 @@ def __validate_duration(duration): return (start_date, end_date) -def __find_metric_id(descriptors, realm, metric): - return __find_metric_or_dimension_id( - descriptors, - realm, - 'metric', - metric, - ) - - -def __validate_filters(data_warehouse, descriptors, realm, filters): +def __validate_filters(data_warehouse, aggregate_descriptor, realm, filters): try: result = {} for dimension in filters: - dimension_id = _find_dimension_id(descriptors, realm, dimension) + dimension_id = aggregate_descriptor._get_data_id( + 'dimensions', + dimension, + realm, + ) filter_values = filters[dimension] if isinstance(filter_values, str): filter_values = [filter_values] @@ -184,18 +164,30 @@ def __validate_filters(data_warehouse, descriptors, realm, filters): dimension, ) for filter_value in filter_values: - new_filter_value = __find_value_in_df( - 'Filter value', - valid_filter_values, + new_filter_value = _utilities._get_id_from_data_frame( filter_value, + valid_filter_values, + 'filter value', + realm, ) - result[dimension_id].append(new_filter_value) + if new_filter_value is None: + warnings.warn( + ( + f'The filter value "{filter_value}" was not found' + f' for the "{dimension}" dimension in the' + f' "{realm}" realm.' + ), + UserWarning, + stacklevel=4, + ) + else: + result[dimension_id].append(new_filter_value) return result except TypeError: raise TypeError( '`filters` must be a mapping whose keys are strings and whose' + ' values are strings or sequences of strings.', - ) from None + ) def __assert_bool(name, value): @@ -215,13 +207,14 @@ def __find_str_in_sequence(value, sequence, label): ) from None -def __validate_raw_fields(data_warehouse, realm, fields): +def __validate_raw_fields(raw_descriptor, realm, fields): try: results = [] - valid_raw_fields = data_warehouse.describe_raw_fields(realm) for field in fields: - new_field = __find_value_in_df('Field', valid_raw_fields, field) - results.append(new_field) + field_id = raw_descriptor._get_data_id('fields', field, realm) + if field_id is None: + raise KeyError(f'Raw field "{field}" not found.') from None + results.append(field_id) return results except TypeError: raise TypeError( @@ -229,24 +222,6 @@ def __validate_raw_fields(data_warehouse, realm, fields): ) from None -def __find_id_in_descriptor(descriptor, name, value): - _assert_str(name, value) - for id_ in descriptor: - if id_ == value or descriptor[id_]['label'] == value: - return id_ - raise KeyError( - name.capitalize() + " '" + value + "' not found.", - ) - - -def __find_metric_or_dimension_id(descriptors, realm, m_or_d, value): - return __find_id_in_descriptor( - descriptors._get_aggregate()[realm][m_or_d + 's'], - m_or_d, - value, - ) - - def __get_dates_from_duration(duration): today = date.today() yesterday = today + timedelta(days=-1) @@ -315,15 +290,6 @@ def __get_dates_from_duration(duration): return durations_to_dates[duration] -def __find_value_in_df(label, df, value): - if value in df.index: - return value - elif value in df['label'].values: - return df.index[df['label'] == value].tolist()[0] - else: - raise KeyError(label + " '" + value + "' not found.") - - def __lowercase_and_remove_spaces(value): return value.lower().replace(' ', '') diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index ba016f76..dbd03ff1 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -1,7 +1,7 @@ import numpy as np import os import pandas as pd -from xdmod_data._descriptors import _Descriptors +from xdmod_data._descriptors import _AggregateDescriptor, _RawDescriptor from xdmod_data._http_requester import _HttpRequester import xdmod_data._response_processor as _response_processor import xdmod_data._validator as _validator @@ -51,7 +51,10 @@ def __init__(self, xdmod_host=None): + ' variable must be set.', ) from None self.__http_requester = _HttpRequester(xdmod_host) - self.__descriptors = _Descriptors(self.__http_requester) + self.__aggregate_descriptor = _AggregateDescriptor( + self.__http_requester, + ) + self.__raw_descriptor = _RawDescriptor(self.__http_requester) def __enter__(self): self.__in_runtime_context = True @@ -115,7 +118,9 @@ def get_data( filters : mapping, optional A mapping of dimensions to their possible values. Results will only be included whose values for each of the given dimensions - match one of the corresponding given values. + match one of the corresponding given values. If any of the + provided lists of values are empty, then an empty Pandas Series + will be returned. dataset_type : str, optional Either 'timeseries' or 'aggregate'. aggregation_unit : str, optional @@ -143,16 +148,24 @@ def get_data( If any of the arguments are of the wrong type. ValueError If `duration` is an object but not of length 2. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`, `metric`, + `dimension`, and/or one of the keys of `filters`. + UserWarning + If a value in `filters` is provided that is not found. """ _validator._assert_runtime_context(self.__in_runtime_context) params = _validator._validate_get_data_params( self, - self.__descriptors, + self.__aggregate_descriptor, locals(), ) response = self.__http_requester._request_data(params) return _response_processor._process_get_data_response( - self, + self.__aggregate_descriptor, params, response.text, ) @@ -183,7 +196,9 @@ def get_raw_data( filters : mapping, optional A mapping of dimensions to their possible values. Results will only be included whose values for each of the given dimensions - match one of the corresponding given values. + match one of the corresponding given values. If any of the + provided lists of values are empty, then an empty Pandas Series + will be returned. show_progress : bool, optional If true, periodically print how many rows have been gotten so far. @@ -212,15 +227,29 @@ def get_raw_data( If any of the arguments are of the wrong type. ValueError If `duration` is an object but not of length 2. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`, `fields`, and/or + one of the keys of `filters`. + UserWarning + If a value in `filters` is provided that is not found. """ _validator._assert_runtime_context(self.__in_runtime_context) params = _validator._validate_get_raw_data_params( self, - self.__descriptors, + self.__aggregate_descriptor, + self.__raw_descriptor, locals(), ) (data, column_data) = self.__http_requester._request_raw_data(params) - return self.__get_data_frame(data, column_data) + result = pd.DataFrame( + data, + columns=pd.Series(column_data, dtype='string'), + dtype='string', + ) + return result def describe_realms(self): """Get a data frame describing the valid realms in the data warehouse. @@ -237,11 +266,7 @@ def describe_realms(self): there is an error requesting data from the warehouse. """ _validator._assert_runtime_context(self.__in_runtime_context) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_aggregate(), - ('id', 'label'), - 'id', - ) + return self.__aggregate_descriptor._get_data_frame('realms') def describe_metrics(self, realm): """Get a data frame describing the valid metrics for the given realm. @@ -267,8 +292,14 @@ def describe_metrics(self, realm): there is an error requesting data from the warehouse. TypeError If `realm` is not a string. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`. """ - return self.__describe_metrics_or_dimensions(realm, 'metrics') + _validator._assert_runtime_context(self.__in_runtime_context) + return self.__aggregate_descriptor._get_data_frame('metrics', realm) def describe_dimensions(self, realm): """Get a data frame describing the valid dimensions for the given @@ -295,8 +326,14 @@ def describe_dimensions(self, realm): there is an error requesting data from the warehouse. TypeError If `realm` is not a string. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`. """ - return self.__describe_metrics_or_dimensions(realm, 'dimensions') + _validator._assert_runtime_context(self.__in_runtime_context) + return self.__aggregate_descriptor._get_data_frame('dimensions', realm) def get_filter_values(self, realm, dimension): """Get a data frame containing the valid filter values for the given @@ -328,20 +365,28 @@ def get_filter_values(self, realm, dimension): there is an error requesting data from the warehouse. TypeError If `realm` or `dimension` are not strings. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm` or `dimension`. """ _validator._assert_runtime_context(self.__in_runtime_context) - realm_id = _validator._find_realm_id(self.__descriptors, realm) - dimension_id = _validator._find_dimension_id( - self.__descriptors, - realm_id, + realm_id = self.__aggregate_descriptor._get_data_id('realms', realm) + dimension_id = self.__aggregate_descriptor._get_data_id( + 'dimensions', dimension, + realm_id, ) response_data = self.__http_requester._request_filter_values( realm_id, dimension_id, ) - data = [(datum['id'], datum['name']) for datum in response_data] - result = self.__get_data_frame(data, ('id', 'label'), 'id') + result = pd.DataFrame( + data=[(datum['id'], datum['name']) for datum in response_data], + columns=pd.Series(['id', 'label'], dtype='string'), + dtype='string', + ).set_index('id') return result def get_durations(self): @@ -381,11 +426,7 @@ def describe_raw_realms(self): there is an error requesting data from the warehouse. """ _validator._assert_runtime_context(self.__in_runtime_context) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_raw(), - ('id', 'label'), - 'id', - ) + return self.__raw_descriptor._get_data_frame('realms') def describe_raw_fields(self, realm): """Get a data frame describing the raw data fields for the given realm. @@ -412,13 +453,15 @@ def describe_raw_fields(self, realm): there is an error requesting data from the warehouse. TypeError If `realm` is not a string. + + Warns + ----- + FutureWarning + If a deprecated value is provided for `realm`. """ _validator._assert_runtime_context(self.__in_runtime_context) - realm_id = _validator._find_raw_realm_id(self.__descriptors, realm) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_raw()[realm_id]['fields'], - ('id', 'label', 'description'), - 'id', + return self.__raw_descriptor._get_data_frame('fields', realm).drop( + columns='deprecated_names', ) def get_resources(self, service_provider=None): @@ -445,47 +488,3 @@ def get_resources(self, service_provider=None): """ _validator._assert_runtime_context(self.__in_runtime_context) return self.__http_requester._request_resources(service_provider) - - def _get_metric_label(self, realm, metric_id): - d = self.__descriptors._get_aggregate() - return d[realm]['metrics'][metric_id]['label'] - - def _get_dimension_label(self, realm, dimension_id): - if dimension_id == 'none': - return None - d = self.__descriptors._get_aggregate() - return d[realm]['dimensions'][dimension_id]['label'] - - def __get_data_frame(self, data, column_data, index=None): - result = pd.DataFrame( - data=data, - columns=pd.Series( - data=column_data, - dtype='string', - ), - dtype='string', - ).fillna(value=np.nan) - if index: - result = result.set_index(index) - return result - - def __get_data_frame_from_descriptor( - self, - descriptor, - columns, - index=None, - ): - data = [ - [id_] + [descriptor[id_][column] for column in columns[1:]] - for id_ in descriptor - ] - return self.__get_data_frame(data, columns, index) - - def __describe_metrics_or_dimensions(self, realm, m_or_d): - _validator._assert_runtime_context(self.__in_runtime_context) - realm_id = _validator._find_realm_id(self.__descriptors, realm) - return self.__get_data_frame_from_descriptor( - self.__descriptors._get_aggregate()[realm_id][m_or_d], - ('id', 'label', 'description'), - 'id', - ) From 8b337e87322687a0de1bccb41a38d6be1b1381e7 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 18:00:01 -0400 Subject: [PATCH 02/11] Update. --- xdmod_data/_http_requester.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/xdmod_data/_http_requester.py b/xdmod_data/_http_requester.py index f42531f6..ea76a416 100644 --- a/xdmod_data/_http_requester.py +++ b/xdmod_data/_http_requester.py @@ -2,7 +2,6 @@ import os import re import requests -from types import SimpleNamespace from urllib.parse import urlencode import xdmod_data._validator as _validator from xdmod_data.__version__ import __title__, __version__ @@ -31,16 +30,6 @@ def _tear_down(self): self.__in_runtime_context = False def _request_data(self, params): - # If any of the filter values are empty lists, don't bother getting the - # data. - if ( - 'filters' in params - and any( - not filter_values - for filter_values in params['filters'].values() - ) - ): - return SimpleNamespace(text='') return self.__request( path='/controllers/user_interface.php', post_fields=self.__get_data_post_fields(params), From 60ffa3870a03f77d51a08cebd922deec35fd1807 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 18:17:05 -0400 Subject: [PATCH 03/11] Update. --- xdmod_data/_descriptors.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 90d66131..4fa86fe8 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -1,6 +1,7 @@ import pandas as pd import warnings import xdmod_data._utilities as _utilities +import xdmod_data._validator as _validator class _Descriptor: def __init__(self, http_requester): @@ -27,15 +28,25 @@ def _get_data_frame(self, data_type, realm=None): def _get_data_id(self, data_type, value, realm=None): data_type_label = data_type.rstrip('s') + _validator._assert_str(data_type_label, value) if isinstance(self, _RawDescriptor): data_type_label = f'raw {data_type_label}' data_frame = self._get_data_frame(data_type, realm) - return _utilities._get_id_from_data_frame( + data_id = _utilities._get_id_from_data_frame( value, data_frame, data_type_label, realm, ) + if data_id is None: + realm_text = ( + f' in the "{realm}" realm' if realm is not None else '' + ) + raise KeyError( + f'Value for "{data_type_label}" is unknown{realm_text}:' + f' "{value}"', + ) from None + return data_id def _get_label_from_id(self, data_type, data_id, realm=None): if data_type == 'dimensions' and data_id == 'none': From 474bea34e4d4396b657b1998ce8f869e9e346524 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 18:25:16 -0400 Subject: [PATCH 04/11] Update. --- xdmod_data/_descriptors.py | 2 +- xdmod_data/_validator.py | 2 +- xdmod_data/warehouse.py | 9 +++------ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 4fa86fe8..653952a2 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -43,7 +43,7 @@ def _get_data_id(self, data_type, value, realm=None): f' in the "{realm}" realm' if realm is not None else '' ) raise KeyError( - f'Value for "{data_type_label}" is unknown{realm_text}:' + f'Value for `{data_type_label}` is unknown{realm_text}:' f' "{value}"', ) from None return data_id diff --git a/xdmod_data/_validator.py b/xdmod_data/_validator.py index 47b5364b..3a5d3557 100644 --- a/xdmod_data/_validator.py +++ b/xdmod_data/_validator.py @@ -187,7 +187,7 @@ def __validate_filters(data_warehouse, aggregate_descriptor, realm, filters): raise TypeError( '`filters` must be a mapping whose keys are strings and whose' + ' values are strings or sequences of strings.', - ) + ) from None def __assert_bool(name, value): diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index dbd03ff1..cd206ee2 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -118,9 +118,7 @@ def get_data( filters : mapping, optional A mapping of dimensions to their possible values. Results will only be included whose values for each of the given dimensions - match one of the corresponding given values. If any of the - provided lists of values are empty, then an empty Pandas Series - will be returned. + match one of the corresponding given values. dataset_type : str, optional Either 'timeseries' or 'aggregate'. aggregation_unit : str, optional @@ -196,9 +194,7 @@ def get_raw_data( filters : mapping, optional A mapping of dimensions to their possible values. Results will only be included whose values for each of the given dimensions - match one of the corresponding given values. If any of the - provided lists of values are empty, then an empty Pandas Series - will be returned. + match one of the corresponding given values. show_progress : bool, optional If true, periodically print how many rows have been gotten so far. @@ -462,6 +458,7 @@ def describe_raw_fields(self, realm): _validator._assert_runtime_context(self.__in_runtime_context) return self.__raw_descriptor._get_data_frame('fields', realm).drop( columns='deprecated_names', + errors='ignore', ) def get_resources(self, service_provider=None): From d7b98798569b3a01ff9cbca670629bcd0b19f274 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 18:34:53 -0400 Subject: [PATCH 05/11] Update. --- xdmod_data/_descriptors.py | 18 ++++++++++++++++-- xdmod_data/warehouse.py | 7 ++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 653952a2..85b5e316 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -8,7 +8,12 @@ def __init__(self, http_requester): self.__http_requester = http_requester self.__cached = None - def _get_data_frame(self, data_type, realm=None): + def _get_data_frame( + self, + data_type, + realm=None, + drop_deprecated_names_column=True, + ): if self.__cached is None: self.__cached = self._request(self.__http_requester) if realm is not None: @@ -24,6 +29,11 @@ def _get_data_frame(self, data_type, realm=None): if data_type == 'realms': data_frame = data_frame['label'].to_frame() data_frame = data_frame.rename_axis('id') + if drop_deprecated_names_column: + data_frame = data_frame.drop( + columns='deprecated_names', + errors='ignore', + ) return data_frame def _get_data_id(self, data_type, value, realm=None): @@ -31,7 +41,11 @@ def _get_data_id(self, data_type, value, realm=None): _validator._assert_str(data_type_label, value) if isinstance(self, _RawDescriptor): data_type_label = f'raw {data_type_label}' - data_frame = self._get_data_frame(data_type, realm) + data_frame = self._get_data_frame( + data_type, + realm, + drop_deprecated_names_column=False, + ) data_id = _utilities._get_id_from_data_frame( value, data_frame, diff --git a/xdmod_data/warehouse.py b/xdmod_data/warehouse.py index cd206ee2..896d263b 100644 --- a/xdmod_data/warehouse.py +++ b/xdmod_data/warehouse.py @@ -244,7 +244,7 @@ def get_raw_data( data, columns=pd.Series(column_data, dtype='string'), dtype='string', - ) + ).fillna(value=np.nan) return result def describe_realms(self): @@ -456,10 +456,7 @@ def describe_raw_fields(self, realm): If a deprecated value is provided for `realm`. """ _validator._assert_runtime_context(self.__in_runtime_context) - return self.__raw_descriptor._get_data_frame('fields', realm).drop( - columns='deprecated_names', - errors='ignore', - ) + return self.__raw_descriptor._get_data_frame('fields', realm) def get_resources(self, service_provider=None): """Get a dictionary containing information about the configured From f717d465f9191026182a09b89a4c08c2d9c13e26 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 18:38:49 -0400 Subject: [PATCH 06/11] Update. --- tests/ci/scripts/run-tests.sh | 2 +- xdmod_data/_descriptors.py | 2 +- xdmod_data/_response_processor.py | 14 ++++---------- xdmod_data/_utilities.py | 2 +- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/ci/scripts/run-tests.sh b/tests/ci/scripts/run-tests.sh index 4835fb39..c2795429 100755 --- a/tests/ci/scripts/run-tests.sh +++ b/tests/ci/scripts/run-tests.sh @@ -5,7 +5,7 @@ set -exo pipefail export MIN_PYTHON_VERSION=3.8 -export MAX_PYTHON_VERSION=3.13 +export MAX_PYTHON_VERSION=3.14 export XDMOD_11_0_IMAGE=tools-ext-01.ccr.xdmod.org/xdmod:x86_64-rockylinux8.9.20231119-v11.0.0-1.0-03 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 85b5e316..7874cb06 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -1,8 +1,8 @@ import pandas as pd -import warnings import xdmod_data._utilities as _utilities import xdmod_data._validator as _validator + class _Descriptor: def __init__(self, http_requester): self.__http_requester = http_requester diff --git a/xdmod_data/_response_processor.py b/xdmod_data/_response_processor.py index cc5ad5e0..0faeeab0 100644 --- a/xdmod_data/_response_processor.py +++ b/xdmod_data/_response_processor.py @@ -19,12 +19,12 @@ def _process_get_data_response(aggregate_descriptor, params, response): ) csv_data = csv.reader(response.splitlines()) if params['dataset_type'] == 'timeseries': - return __parse_timeseries_csv_data(dw, params, csv_data) + return __parse_timeseries_csv_data(params, csv_data) else: return __parse_aggregate_csv_data(params, csv_data) -def __parse_timeseries_csv_data(dw, params, csv_data): +def __parse_timeseries_csv_data(params, csv_data): time_values = [] data = [] for line_num, line in enumerate(csv_data): @@ -34,7 +34,6 @@ def __parse_timeseries_csv_data(dw, params, csv_data): time_values.append(__parse_timeseries_date_string(line[0])) data.append(np.asarray(line[1:])) return __get_timeseries_data_frame( - dw, params, data, time_values, @@ -82,7 +81,6 @@ def __parse_timeseries_date_string(date_string): def __get_timeseries_data_frame( - dw, params, data, time_values, @@ -95,11 +93,7 @@ def __get_timeseries_data_frame( dtype='datetime64[ns]', name='Time', ), - columns=__get_timeseries_data_frame_columns( - dw, - params, - dimension_values, - ), + columns=__get_timeseries_data_frame_columns(params, dimension_values), dtype='Float64', ).fillna(value=np.nan) @@ -143,7 +137,7 @@ def __parse_quarter_date_string(date_string): return (date_string, format_) -def __get_timeseries_data_frame_columns(dw, params, dimension_values): +def __get_timeseries_data_frame_columns(params, dimension_values): if params['dimension'] is None: columns = pd.Series( data=params['metric'], diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py index 58a3ee1a..c57f568e 100644 --- a/xdmod_data/_utilities.py +++ b/xdmod_data/_utilities.py @@ -18,7 +18,7 @@ def _get_id_from_data_frame( lambda deprecated_names: ( not pd.isna(deprecated_names) and value in deprecated_names - ) + ), ) mask |= deprecated_names_mask matches = data_frame.index[mask] From 6a3e89da48f25bf88eb426bcc7d6e6c9104be8ae Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Thu, 16 Apr 2026 21:53:23 -0400 Subject: [PATCH 07/11] Update. --- .../test_datawarehouse_integration.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_datawarehouse_integration.py b/tests/integration/test_datawarehouse_integration.py index 41990bae..f59ec3e0 100644 --- a/tests/integration/test_datawarehouse_integration.py +++ b/tests/integration/test_datawarehouse_integration.py @@ -52,13 +52,15 @@ } KEY_ERROR_TEST_VALUES_AND_MATCHES = { 'duration': (INVALID_STR, 'Invalid value for `duration`'), - 'realm': (INVALID_STR, r'Realm .* not found'), - 'metric': (INVALID_STR, r'Metric .* not found'), - 'dimension': (INVALID_STR, r'Dimension .* not found'), - 'filter_key': ({INVALID_STR: INVALID_STR}, r'Dimension .* not found'), - 'filter_value': ( - {VALID_DIMENSION: INVALID_STR}, - r'Filter value .* not found', + 'realm': (INVALID_STR, f'Value for `realm` is unknown: "{INVALID_STR}"'), + 'metric': (INVALID_STR, f'Value for `metric` is unknown: "{INVALID_STR}"'), + 'dimension': ( + INVALID_STR, + f'Value for `dimension` is unknown: "{INVALID_STR}"', + ), + 'filter_key': ( + {INVALID_STR: INVALID_STR}, + f'Value for `dimension` is unknown: "{INVALID_STR}"', ), 'dataset_type': (INVALID_STR, 'Invalid value for `dataset_type`'), 'aggregation_unit': (INVALID_STR, 'Invalid value for `aggregation_unit`'), From 48c4e3d95fe7ea56dc683ce9aa597ce4ca3e7801 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Fri, 17 Apr 2026 14:06:52 -0400 Subject: [PATCH 08/11] Update. --- .../test_datawarehouse_integration.py | 90 +++++++++++++++---- ...w-data-every-1000-no-fields-no-filters.csv | 2 +- .../test_datawarehouse_regression.py | 2 +- xdmod_data/_descriptors.py | 15 ++-- xdmod_data/_utilities.py | 10 ++- xdmod_data/_validator.py | 15 ++-- 6 files changed, 95 insertions(+), 39 deletions(-) diff --git a/tests/integration/test_datawarehouse_integration.py b/tests/integration/test_datawarehouse_integration.py index f59ec3e0..7b347257 100644 --- a/tests/integration/test_datawarehouse_integration.py +++ b/tests/integration/test_datawarehouse_integration.py @@ -50,26 +50,38 @@ 'show_progress': False, 'service_provider': 'screw', } + + +def __get_key_error_test_match(param): + realm_text = '' + if param in ['metric', 'dimension']: + realm_text = f' in the \'{VALID_VALUES["realm"]}\' realm' + return f"Value for `{param}` not found{realm_text}: '{INVALID_STR}'" + + KEY_ERROR_TEST_VALUES_AND_MATCHES = { - 'duration': (INVALID_STR, 'Invalid value for `duration`'), - 'realm': (INVALID_STR, f'Value for `realm` is unknown: "{INVALID_STR}"'), - 'metric': (INVALID_STR, f'Value for `metric` is unknown: "{INVALID_STR}"'), - 'dimension': ( - INVALID_STR, - f'Value for `dimension` is unknown: "{INVALID_STR}"', - ), - 'filter_key': ( + 'filters:key': [ {INVALID_STR: INVALID_STR}, - f'Value for `dimension` is unknown: "{INVALID_STR}"', - ), - 'dataset_type': (INVALID_STR, 'Invalid value for `dataset_type`'), - 'aggregation_unit': (INVALID_STR, 'Invalid value for `aggregation_unit`'), - 'parameter': ( + __get_key_error_test_match('dimension'), + ], + 'parameter': [ INVALID_STR, 'Parameter .* does not have a list of valid values', - ), - 'field': (INVALID_STR, r'Field .* not found'), + ], } +for param in [ + 'duration', + 'realm', + 'metric', + 'dimension', + 'dataset_type', + 'aggregation_unit', + 'field', +]: + KEY_ERROR_TEST_VALUES_AND_MATCHES[param] = [ + INVALID_STR, + __get_key_error_test_match(param), + ] key_error_test_ids = [] duration_test_ids = [] @@ -112,10 +124,9 @@ ] value_error_test_methods += [method] if 'filters' in METHOD_PARAMS[method]: - for param in ('filter_key', 'filter_value'): - key_error_test_ids += [method + ':' + param] - (value, match) = KEY_ERROR_TEST_VALUES_AND_MATCHES[param] - key_error_test_params += [(method, {'filters': value}, match)] + key_error_test_ids += [method + ':filters:key'] + (value, match) = KEY_ERROR_TEST_VALUES_AND_MATCHES['filters:key'] + key_error_test_params += [(method, {'filters': value}, match)] load_dotenv(Path(os.path.expanduser(TOKEN_PATH)), override=True) @@ -491,3 +502,44 @@ def test_get_resources_invalid_service_provider(dw_methods): # get_resources is not supported in XDMoD < 11.0.2. if XDMOD_VERSION != 'xdmod-11-0': assert result == [] + + +def test_deprecated_raw_field(dw_methods): + with pytest.warns( + FutureWarning, + match=( + "The field name 'Organization' in the 'Jobs' realm is deprecated" + ' and will be removed in a future version of XDMoD. Use' + " 'User Institution' instead." + ), + ): + __run_method( + dw_methods, + 'get_raw_data', + { + 'duration': ['0000-01-01', '0000-01-01'], + 'realm': 'Jobs', + 'fields': ['Organization'], + }, + ) + + +filters_methods = [ + method for method, params in METHOD_PARAMS.items() if 'filters' in params +] + + +@pytest.mark.parametrize('method', filters_methods) +def test_invalid_filter_value(dw_methods, method): + with pytest.warns( + UserWarning, + match=( + f"Filter value not found for the '{VALID_DIMENSION}' dimension in" + f" the '{VALID_VALUES['realm']}' realm: '{INVALID_STR}'" + ), + ): + __run_method( + dw_methods, + method, + {'filters': {VALID_DIMENSION: INVALID_STR}}, + ) diff --git a/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv b/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv index 42a56249..3c76b055 100644 --- a/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv +++ b/tests/regression/data/xdmod-11-0/raw-data-every-1000-no-fields-no-filters.csv @@ -1,4 +1,4 @@ -,Local Job Id,Resource,Timezone,System Username (Deidentified),User,Organization,Quality of Service,Submit Time (Timestamp),Start Time (Timestamp),End Time (Timestamp),Eligible Time (Timestamp),Nodes,Cores,GPUs,Memory Used,Wall Time,Wait Time,Core Time,GPU Time,Exit Code,Exit State,Requested Cores,Requested memory,Requested Wall Time,Queue,Decanal Unit,Department,PI Group +,Local Job Id,Resource,Timezone,System Username (Deidentified),User,User Institution,Quality of Service,Submit Time (Timestamp),Start Time (Timestamp),End Time (Timestamp),Eligible Time (Timestamp),Nodes,Cores,GPUs,Memory Used,Wall Time,Wait Time,Core Time,GPU Time,Exit Code,Exit State,Requested Cores,Requested memory,Requested Wall Time,Queue,Decanal Unit,Department,PI Group 0,6110386,Robertson,UTC,4003ebd9b2de239734970b4cf32cc3d6183d2c6b,"Harrier, Hen",Screwdriver,curry,1483118438,1483118438,1483118443,1483118438,1,1,0,-1,5,0,5,0,0:0,COMPLETED,1,48000Mn,172800,white,Computer and Information Science and Engineering,Computer and Computation Research,Computer and Computation Theory 1000,970339,Mortorq,UTC,0b888f86869ffa092ddb8e069c6bb9ec195964ee,Honey-buzzard,Screwdriver,banana-cream,1483064122,1483064124,1483064151,1483064122,1,8,0,-1,27,2,216,0,1:0,FAILED,8,2147486848Mn,108000,black,"Social, Behavioral, and Economic Sciences",Social and Economic Science,Sociology 2000,981731,Mortorq,UTC,0b888f86869ffa092ddb8e069c6bb9ec195964ee,Honey-buzzard,Screwdriver,banana-cream,1483102441,1483102444,1483102466,1483102441,1,8,0,-1,22,3,176,0,1:0,FAILED,8,2147486848Mn,108000,black,"Social, Behavioral, and Economic Sciences",Social and Economic Science,Sociology diff --git a/tests/regression/test_datawarehouse_regression.py b/tests/regression/test_datawarehouse_regression.py index 4c11352d..8bf84b35 100644 --- a/tests/regression/test_datawarehouse_regression.py +++ b/tests/regression/test_datawarehouse_regression.py @@ -240,7 +240,7 @@ def test_get_durations(valid_dw): @pytest.mark.parametrize( - 'service_provider', [[None], ['screw']], + 'service_provider', [[None], ['screw']], ids=['none', 'not-none'], ) def test_get_resources(valid_dw, service_provider): # get_resources is not supported in XDMoD < 11.0.2. diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index 7874cb06..da00bd7f 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -34,13 +34,13 @@ def _get_data_frame( columns='deprecated_names', errors='ignore', ) + data_frame.index = data_frame.index.astype('string') + data_frame.columns = data_frame.columns.astype('string') return data_frame def _get_data_id(self, data_type, value, realm=None): - data_type_label = data_type.rstrip('s') - _validator._assert_str(data_type_label, value) - if isinstance(self, _RawDescriptor): - data_type_label = f'raw {data_type_label}' + param_name = data_type.rstrip('s') + _validator._assert_str(param_name, value) data_frame = self._get_data_frame( data_type, realm, @@ -49,16 +49,15 @@ def _get_data_id(self, data_type, value, realm=None): data_id = _utilities._get_id_from_data_frame( value, data_frame, - data_type_label, + param_name, realm, ) if data_id is None: realm_text = ( - f' in the "{realm}" realm' if realm is not None else '' + f" in the '{realm}' realm" if realm is not None else '' ) raise KeyError( - f'Value for `{data_type_label}` is unknown{realm_text}:' - f' "{value}"', + f"Value for `{param_name}` not found{realm_text}: '{value}'", ) from None return data_id diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py index c57f568e..c8256ee9 100644 --- a/xdmod_data/_utilities.py +++ b/xdmod_data/_utilities.py @@ -45,14 +45,16 @@ def __warn_if_deprecated_name( realm, ): label = data_frame.loc[data_id, 'label'] - alternative = label if data_id == label else f'{data_id}" or "{label}' - realm_text = f' in the "{realm}" realm' if realm is not None else '' + alternative = label + if data_id != label: + alternative = f"{data_id}' or '{label}" + realm_text = f" in the '{realm}' realm" if realm is not None else '' if deprecated_names_mask.any(): warnings.warn( ( - f'The {data_type_label} name "{name}"{realm_text} is' + f"The {data_type_label} name '{name}'{realm_text} is" f' deprecated and will be removed in a future version of' - f' XDMoD. Use "{alternative}" instead.' + f" XDMoD. Use '{alternative}' instead." ), FutureWarning, stacklevel=7, diff --git a/xdmod_data/_validator.py b/xdmod_data/_validator.py index 3a5d3557..0dfeac0b 100644 --- a/xdmod_data/_validator.py +++ b/xdmod_data/_validator.py @@ -173,9 +173,9 @@ def __validate_filters(data_warehouse, aggregate_descriptor, realm, filters): if new_filter_value is None: warnings.warn( ( - f'The filter value "{filter_value}" was not found' - f' for the "{dimension}" dimension in the' - f' "{realm}" realm.' + f"Filter value not found for the '{dimension}'" + f" dimension in the '{realm}' realm:" + f' {filter_value!r}' ), UserWarning, stacklevel=4, @@ -201,9 +201,10 @@ def __find_str_in_sequence(value, sequence, label): transformed_valid_value = __lowercase_and_remove_spaces(valid_value) if transformed_valid_value == transformed_value: return valid_value + sequence_str = "', '".join(sequence) raise KeyError( - 'Invalid value for `' + label + "`: '" + value + "'" - + ". Valid values are: '" + "', '".join(sequence) + "'.", + f"Value for `{label}` not found: '{value}'. Valid values are:" + f" '{sequence_str}'.", ) from None @@ -213,7 +214,9 @@ def __validate_raw_fields(raw_descriptor, realm, fields): for field in fields: field_id = raw_descriptor._get_data_id('fields', field, realm) if field_id is None: - raise KeyError(f'Raw field "{field}" not found.') from None + raise KeyError( + f"Raw field not found in the {realm} realm: '{field}'.", + ) from None results.append(field_id) return results except TypeError: From 2228d4e0c50a2f444c64f5ab794aac5bc0be88a2 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Fri, 17 Apr 2026 17:26:16 -0400 Subject: [PATCH 09/11] Update. --- xdmod_data/_descriptors.py | 10 ++++++---- xdmod_data/_utilities.py | 35 ++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/xdmod_data/_descriptors.py b/xdmod_data/_descriptors.py index da00bd7f..a344ae22 100644 --- a/xdmod_data/_descriptors.py +++ b/xdmod_data/_descriptors.py @@ -12,7 +12,7 @@ def _get_data_frame( self, data_type, realm=None, - drop_deprecated_names_column=True, + drop_deprecation_columns=True, ): if self.__cached is None: self.__cached = self._request(self.__http_requester) @@ -29,9 +29,9 @@ def _get_data_frame( if data_type == 'realms': data_frame = data_frame['label'].to_frame() data_frame = data_frame.rename_axis('id') - if drop_deprecated_names_column: + if drop_deprecation_columns: data_frame = data_frame.drop( - columns='deprecated_names', + columns=['deprecated', 'deprecated_names'], errors='ignore', ) data_frame.index = data_frame.index.astype('string') @@ -44,7 +44,7 @@ def _get_data_id(self, data_type, value, realm=None): data_frame = self._get_data_frame( data_type, realm, - drop_deprecated_names_column=False, + drop_deprecation_columns=False, ) data_id = _utilities._get_id_from_data_frame( value, @@ -110,6 +110,8 @@ def _request(self, http_requester): 'label': field['display'], 'description': field['documentation'], } + if 'deprecated' in field: + r['deprecated'] = field['deprecated'] if 'deprecatedNames' in field: r['deprecated_names'] = field['deprecatedNames'] result[realm_id]['fields'][field['alias']] = r diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py index c8256ee9..4077de1e 100644 --- a/xdmod_data/_utilities.py +++ b/xdmod_data/_utilities.py @@ -12,6 +12,8 @@ def _get_id_from_data_frame( (data_frame.index == value) | (data_frame['label'] == value) ) + if 'deprecated' in data_frame.columns: + mask |= data_frame['deprecated'].apply(lambda x: not pd.isna(x) and x) deprecated_names_mask = pd.Series(False, index=data_frame.index) if 'deprecated_names' in data_frame.columns: deprecated_names_mask = data_frame['deprecated_names'].apply( @@ -25,7 +27,7 @@ def _get_id_from_data_frame( if matches.empty: return None data_id = matches[0] - __warn_if_deprecated_name( + __warn_if_deprecated( value, data_frame, data_type_label, @@ -36,7 +38,7 @@ def _get_id_from_data_frame( return data_id -def __warn_if_deprecated_name( +def __warn_if_deprecated( name, data_frame, data_type_label, @@ -44,17 +46,32 @@ def __warn_if_deprecated_name( deprecated_names_mask, realm, ): - label = data_frame.loc[data_id, 'label'] - alternative = label - if data_id != label: - alternative = f"{data_id}' or '{label}" - realm_text = f" in the '{realm}' realm" if realm is not None else '' - if deprecated_names_mask.any(): + realm_text = '' + warn = False + if realm is not None: + realm_text = f" in the '{realm}' realm" + if ( + 'deprecated' in data_frame.columns + and data_frame.loc[data_id, 'deprecated'] + ): + alternative_text = data_frame.loc[data_id, 'description'].replace( + 'DEPRECATED: ', + '', + ) + warn = True + elif deprecated_names_mask.any(): + label = data_frame.loc[data_id, 'label'] + alternative = label + if data_id != label: + alternative = f"{data_id}' or '{label}" + alternative_text = f"Use '{alternative} instead." + warn = True + if warn: warnings.warn( ( f"The {data_type_label} name '{name}'{realm_text} is" f' deprecated and will be removed in a future version of' - f" XDMoD. Use '{alternative}' instead." + f" XDMoD. {alternative_text}" ), FutureWarning, stacklevel=7, From d1de3eb51d50ac9be59a7df7e554868336099b17 Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Mon, 20 Apr 2026 11:03:56 -0400 Subject: [PATCH 10/11] Update. --- xdmod_data/_utilities.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py index 4077de1e..71c193be 100644 --- a/xdmod_data/_utilities.py +++ b/xdmod_data/_utilities.py @@ -12,8 +12,6 @@ def _get_id_from_data_frame( (data_frame.index == value) | (data_frame['label'] == value) ) - if 'deprecated' in data_frame.columns: - mask |= data_frame['deprecated'].apply(lambda x: not pd.isna(x) and x) deprecated_names_mask = pd.Series(False, index=data_frame.index) if 'deprecated_names' in data_frame.columns: deprecated_names_mask = data_frame['deprecated_names'].apply( @@ -50,21 +48,20 @@ def __warn_if_deprecated( warn = False if realm is not None: realm_text = f" in the '{realm}' realm" - if ( - 'deprecated' in data_frame.columns - and data_frame.loc[data_id, 'deprecated'] - ): - alternative_text = data_frame.loc[data_id, 'description'].replace( - 'DEPRECATED: ', - '', - ) - warn = True - elif deprecated_names_mask.any(): + if 'deprecated' in data_frame.columns: + deprecated = data_frame.loc[data_id, 'deprecated'] + if not pd.isna(deprecated) and deprecated: + alternative_text = data_frame.loc[data_id, 'description'].replace( + 'DEPRECATED: ', + '', + ) + warn = True + if deprecated_names_mask.any(): label = data_frame.loc[data_id, 'label'] alternative = label if data_id != label: alternative = f"{data_id}' or '{label}" - alternative_text = f"Use '{alternative} instead." + alternative_text = f"Use '{alternative}' instead." warn = True if warn: warnings.warn( From 18afdfb876c9be7de0a33131e55de1a3ad770e6e Mon Sep 17 00:00:00 2001 From: Aaron Weeden Date: Mon, 20 Apr 2026 11:33:34 -0400 Subject: [PATCH 11/11] Update. --- xdmod_data/_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xdmod_data/_utilities.py b/xdmod_data/_utilities.py index 71c193be..07243269 100644 --- a/xdmod_data/_utilities.py +++ b/xdmod_data/_utilities.py @@ -56,7 +56,7 @@ def __warn_if_deprecated( '', ) warn = True - if deprecated_names_mask.any(): + if not warn and deprecated_names_mask.any(): label = data_frame.loc[data_id, 'label'] alternative = label if data_id != label: