Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/ci/scripts/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
set -exo pipefail

export MIN_PYTHON_VERSION=3.8
export MAX_PYTHON_VERSION=3.13
export MAX_PYTHON_VERSION=3.14
export XDMOD_11_0_IMAGE=tools-ext-01.ccr.xdmod.org/xdmod:x86_64-rockylinux8.9.20231119-v11.0.0-1.0-03

BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
Expand Down
90 changes: 72 additions & 18 deletions tests/integration/test_datawarehouse_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,24 +50,38 @@
'show_progress': False,
'service_provider': 'screw',
}


def __get_key_error_test_match(param):
realm_text = ''
if param in ['metric', 'dimension']:
realm_text = f' in the \'{VALID_VALUES["realm"]}\' realm'
return f"Value for `{param}` not found{realm_text}: '{INVALID_STR}'"


KEY_ERROR_TEST_VALUES_AND_MATCHES = {
'duration': (INVALID_STR, 'Invalid value for `duration`'),
'realm': (INVALID_STR, r'Realm .* not found'),
'metric': (INVALID_STR, r'Metric .* not found'),
'dimension': (INVALID_STR, r'Dimension .* not found'),
'filter_key': ({INVALID_STR: INVALID_STR}, r'Dimension .* not found'),
'filter_value': (
{VALID_DIMENSION: INVALID_STR},
r'Filter value .* not found',
),
'dataset_type': (INVALID_STR, 'Invalid value for `dataset_type`'),
'aggregation_unit': (INVALID_STR, 'Invalid value for `aggregation_unit`'),
'parameter': (
'filters:key': [
{INVALID_STR: INVALID_STR},
__get_key_error_test_match('dimension'),
],
'parameter': [
INVALID_STR,
'Parameter .* does not have a list of valid values',
),
'field': (INVALID_STR, r'Field .* not found'),
],
}
for param in [
'duration',
'realm',
'metric',
'dimension',
'dataset_type',
'aggregation_unit',
'field',
]:
KEY_ERROR_TEST_VALUES_AND_MATCHES[param] = [
INVALID_STR,
__get_key_error_test_match(param),
]

key_error_test_ids = []
duration_test_ids = []
Expand Down Expand Up @@ -110,10 +124,9 @@
]
value_error_test_methods += [method]
if 'filters' in METHOD_PARAMS[method]:
for param in ('filter_key', 'filter_value'):
key_error_test_ids += [method + ':' + param]
(value, match) = KEY_ERROR_TEST_VALUES_AND_MATCHES[param]
key_error_test_params += [(method, {'filters': value}, match)]
key_error_test_ids += [method + ':filters:key']
(value, match) = KEY_ERROR_TEST_VALUES_AND_MATCHES['filters:key']
key_error_test_params += [(method, {'filters': value}, match)]


load_dotenv(Path(os.path.expanduser(TOKEN_PATH)), override=True)
Expand Down Expand Up @@ -489,3 +502,44 @@ def test_get_resources_invalid_service_provider(dw_methods):
# get_resources is not supported in XDMoD < 11.0.2.
if XDMOD_VERSION != 'xdmod-11-0':
assert result == []


def test_deprecated_raw_field(dw_methods):
with pytest.warns(
FutureWarning,
match=(
"The field name 'Organization' in the 'Jobs' realm is deprecated"
' and will be removed in a future version of XDMoD. Use'
" 'User Institution' instead."
),
):
__run_method(
dw_methods,
'get_raw_data',
{
'duration': ['0000-01-01', '0000-01-01'],
'realm': 'Jobs',
'fields': ['Organization'],
},
)


filters_methods = [
method for method, params in METHOD_PARAMS.items() if 'filters' in params
]


@pytest.mark.parametrize('method', filters_methods)
def test_invalid_filter_value(dw_methods, method):
with pytest.warns(
UserWarning,
match=(
f"Filter value not found for the '{VALID_DIMENSION}' dimension in"
f" the '{VALID_VALUES['realm']}' realm: '{INVALID_STR}'"
),
):
__run_method(
dw_methods,
method,
{'filters': {VALID_DIMENSION: INVALID_STR}},
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,Local Job Id,Resource,Timezone,System Username (Deidentified),User,Organization,Quality of Service,Submit Time (Timestamp),Start Time (Timestamp),End Time (Timestamp),Eligible Time (Timestamp),Nodes,Cores,GPUs,Memory Used,Wall Time,Wait Time,Core Time,GPU Time,Exit Code,Exit State,Requested Cores,Requested memory,Requested Wall Time,Queue,Decanal Unit,Department,PI Group
,Local Job Id,Resource,Timezone,System Username (Deidentified),User,User Institution,Quality of Service,Submit Time (Timestamp),Start Time (Timestamp),End Time (Timestamp),Eligible Time (Timestamp),Nodes,Cores,GPUs,Memory Used,Wall Time,Wait Time,Core Time,GPU Time,Exit Code,Exit State,Requested Cores,Requested memory,Requested Wall Time,Queue,Decanal Unit,Department,PI Group
0,6110386,Robertson,UTC,4003ebd9b2de239734970b4cf32cc3d6183d2c6b,"Harrier, Hen",Screwdriver,curry,1483118438,1483118438,1483118443,1483118438,1,1,0,-1,5,0,5,0,0:0,COMPLETED,1,48000Mn,172800,white,Computer and Information Science and Engineering,Computer and Computation Research,Computer and Computation Theory
1000,970339,Mortorq,UTC,0b888f86869ffa092ddb8e069c6bb9ec195964ee,Honey-buzzard,Screwdriver,banana-cream,1483064122,1483064124,1483064151,1483064122,1,8,0,-1,27,2,216,0,1:0,FAILED,8,2147486848Mn,108000,black,"Social, Behavioral, and Economic Sciences",Social and Economic Science,Sociology
2000,981731,Mortorq,UTC,0b888f86869ffa092ddb8e069c6bb9ec195964ee,Honey-buzzard,Screwdriver,banana-cream,1483102441,1483102444,1483102466,1483102441,1,8,0,-1,22,3,176,0,1:0,FAILED,8,2147486848Mn,108000,black,"Social, Behavioral, and Economic Sciences",Social and Economic Science,Sociology
Expand Down
2 changes: 1 addition & 1 deletion tests/regression/test_datawarehouse_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def test_get_durations(valid_dw):


@pytest.mark.parametrize(
'service_provider', [[None], ['screw']],
'service_provider', [[None], ['screw']], ids=['none', 'not-none'],
)
def test_get_resources(valid_dw, service_provider):
# get_resources is not supported in XDMoD < 11.0.2.
Expand Down
106 changes: 82 additions & 24 deletions xdmod_data/_descriptors.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,84 @@
class _Descriptors:
import pandas as pd
import xdmod_data._utilities as _utilities
import xdmod_data._validator as _validator


class _Descriptor:
def __init__(self, http_requester):
self.__http_requester = http_requester
self.__aggregate = None
self.__raw = None
self.__cached = None

def _get_data_frame(
self,
data_type,
realm=None,
drop_deprecation_columns=True,
):
if self.__cached is None:
self.__cached = self._request(self.__http_requester)
if realm is not None:
realm_id = self._get_data_id('realms', realm)
descriptor = self.__cached
if data_type != 'realms':
descriptor = descriptor[realm_id][data_type]
data_frame = pd.DataFrame.from_dict(
descriptor,
orient='index',
dtype='string',
)
if data_type == 'realms':
data_frame = data_frame['label'].to_frame()
data_frame = data_frame.rename_axis('id')
if drop_deprecation_columns:
data_frame = data_frame.drop(
columns=['deprecated', 'deprecated_names'],
errors='ignore',
)
data_frame.index = data_frame.index.astype('string')
data_frame.columns = data_frame.columns.astype('string')
return data_frame

def _get_data_id(self, data_type, value, realm=None):
param_name = data_type.rstrip('s')
_validator._assert_str(param_name, value)
data_frame = self._get_data_frame(
data_type,
realm,
drop_deprecation_columns=False,
)
data_id = _utilities._get_id_from_data_frame(
value,
data_frame,
param_name,
realm,
)
if data_id is None:
realm_text = (
f" in the '{realm}' realm" if realm is not None else ''
)
raise KeyError(
f"Value for `{param_name}` not found{realm_text}: '{value}'",
) from None
return data_id

def _get_aggregate(self):
if self.__aggregate is None:
self.__aggregate = self.__request_aggregate()
return self.__aggregate
def _get_label_from_id(self, data_type, data_id, realm=None):
if data_type == 'dimensions' and data_id == 'none':
return None
data_frame = self._get_data_frame(data_type, realm)
return data_frame.loc[data_id, 'label']

def _get_raw(self):
if self.__raw is None:
self.__raw = self.__request_raw()
return self.__raw

def __request_aggregate(self):
response = self.__http_requester._request_json(
class _AggregateDescriptor(_Descriptor):
def _request(self, http_requester):
response = http_requester._request_json(
'/controllers/metric_explorer.php',
{'operation': 'get_dw_descripter'},
)
if response['totalCount'] != 1: # pragma: no cover
raise RuntimeError(
'Descriptor received with unexpected structure.',
)
return self.__deserialize_aggregate(response['data'][0]['realms'])

def __request_raw(self):
response = self.__http_requester._request_json(
'/rest/v1/warehouse/export/realms',
)
return self.__deserialize_raw(response['data'])

def __deserialize_aggregate(self, serialized_descriptor):
serialized_descriptor = response['data'][0]['realms']
result = {}
for realm in serialized_descriptor:
result[realm] = {'label': serialized_descriptor[realm]['category']}
Expand All @@ -45,16 +92,27 @@ def __deserialize_aggregate(self, serialized_descriptor):
}
return result

def __deserialize_raw(self, serialized_descriptor):

class _RawDescriptor(_Descriptor):
def _request(self, http_requester):
response = http_requester._request_json(
'/rest/v1/warehouse/export/realms',
)
serialized_descriptor = response['data']
result = {}
for realm in serialized_descriptor:
realm_id = realm['id']
result[realm_id] = {'label': realm['name']}
result[realm_id]['fields'] = {}
fields = realm['fields']
for field in fields:
result[realm_id]['fields'][field['alias']] = {
r = {
'label': field['display'],
'description': field['documentation'],
}
if 'deprecated' in field:
r['deprecated'] = field['deprecated']
if 'deprecatedNames' in field:
r['deprecated_names'] = field['deprecatedNames']
result[realm_id]['fields'][field['alias']] = r
return result
26 changes: 11 additions & 15 deletions xdmod_data/_response_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,25 @@
import re


def _process_get_data_response(dw, params, response):
params['metric'] = dw._get_metric_label(
params['realm'],
def _process_get_data_response(aggregate_descriptor, params, response):
params['metric'] = aggregate_descriptor._get_label_from_id(
'metrics',
params['metric'],
)
params['dimension'] = dw._get_dimension_label(
params['realm'],
)
params['dimension'] = aggregate_descriptor._get_label_from_id(
'dimensions',
params['dimension'],
params['realm'],
)
csv_data = csv.reader(response.splitlines())
if params['dataset_type'] == 'timeseries':
return __parse_timeseries_csv_data(dw, params, csv_data)
return __parse_timeseries_csv_data(params, csv_data)
else:
return __parse_aggregate_csv_data(params, csv_data)


def __parse_timeseries_csv_data(dw, params, csv_data):
def __parse_timeseries_csv_data(params, csv_data):
time_values = []
data = []
for line_num, line in enumerate(csv_data):
Expand All @@ -32,7 +34,6 @@ def __parse_timeseries_csv_data(dw, params, csv_data):
time_values.append(__parse_timeseries_date_string(line[0]))
data.append(np.asarray(line[1:]))
return __get_timeseries_data_frame(
dw,
params,
data,
time_values,
Expand Down Expand Up @@ -80,7 +81,6 @@ def __parse_timeseries_date_string(date_string):


def __get_timeseries_data_frame(
dw,
params,
data,
time_values,
Expand All @@ -93,11 +93,7 @@ def __get_timeseries_data_frame(
dtype='datetime64[ns]',
name='Time',
),
columns=__get_timeseries_data_frame_columns(
dw,
params,
dimension_values,
),
columns=__get_timeseries_data_frame_columns(params, dimension_values),
dtype='Float64',
).fillna(value=np.nan)

Expand Down Expand Up @@ -141,7 +137,7 @@ def __parse_quarter_date_string(date_string):
return (date_string, format_)


def __get_timeseries_data_frame_columns(dw, params, dimension_values):
def __get_timeseries_data_frame_columns(params, dimension_values):
if params['dimension'] is None:
columns = pd.Series(
data=params['metric'],
Expand Down
Loading