diff --git a/Changelog.md b/Changelog.md index a81bd44..6b1989a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,6 +1,9 @@ # Change log ---- +# 0.3.10 +- Updated tests to depend on test-aide rather than historic Tubular helpers package + # 0.3.9 - Stripped of internal files and updated md documents diff --git a/docs/source/conf.py b/docs/source/conf.py index a92328e..279834a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,17 +12,18 @@ import os import sys + sys.path.insert(0, os.path.abspath("../../")) # -- Project information ----------------------------------------------------- -project = 'input_checker' -copyright = '2021, LV GI Data Science Team' -author = 'LV GI Data Science Team' +project = "input_checker" +copyright = "2021, LV GI Data Science Team" +author = "LV GI Data Science Team" # The full version, including alpha/beta/rc tags -release = '0.3.9' +release = "0.3.9" # -- General configuration --------------------------------------------------- @@ -31,14 +32,14 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx_rtd_theme' + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx_rtd_theme", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -51,9 +52,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ["_static"] diff --git a/input_checker/_version.py b/input_checker/_version.py index 771bc6e..aa4cd15 100644 --- a/input_checker/_version.py +++ b/input_checker/_version.py @@ -1 +1 @@ -__version__ = "0.3.9" +__version__ = "0.3.10" diff --git a/requirements-dev.txt b/requirements-dev.txt index 58e67f2..cdcdefd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +test-aide>=0.1.0 tubular>=0.2.10 pytest>=5.4.1 pytest-mock>=3.0.0 diff --git a/tests/conftest.py b/tests/conftest.py index e69de29..afce9eb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -0,0 +1,20 @@ +import numpy as np +import pandas as pd +import pytest + + +@pytest.fixture +def df(): + """Create simple DataFrame to use in other tests""" + + data = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5, 6, np.NaN], + "b": ["a", "b", "c", "d", "e", "f", np.NaN], + "c": ["a", "b", "c", "d", "e", "f", np.NaN], + } + ) + + data["c"] = data["c"].astype("category") + + return data diff --git a/tests/test_InputChecker.py b/tests/test_InputChecker.py index fcc5d28..340d299 100644 --- a/tests/test_InputChecker.py +++ b/tests/test_InputChecker.py @@ -3,8 +3,7 @@ import pytest import re import tubular -import tubular.testing.helpers as h -import tubular.testing.test_data as data_generators_p +import test_aide as ta import input_checker from input_checker._version import __version__ @@ -20,7 +19,7 @@ def test_super_init_called(self, mocker): expected_call_args = {0: {"args": (), "kwargs": {"columns": ["a", "b"]}}} - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "__init__", expected_call_args ): InputChecker(columns=["a", "b"]) @@ -30,12 +29,12 @@ def test_inheritance(self): x = InputChecker() - h.assert_inheritance(x, tubular.base.BaseTransformer) + ta.classes.assert_inheritance(x, tubular.base.BaseTransformer) def test_arguments(self): """Test that InputChecker init has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker.__init__, expected_arguments=[ "self", @@ -53,13 +52,11 @@ def test_version_attribute(self): x = InputChecker(columns=["a"]) - h.assert_equal_dispatch( - expected=__version__, - actual=x.version_, - msg="__version__ attribute", + ta.equality.assert_equal_dispatch( + expected=__version__, actual=x.version_, msg="__version__ attribute", ) - def test_columns_attributes_generated(self): + def test_columns_attributes_generated(self, df): """Test all columns attributes are saved with InputChecker init""" x = InputChecker( @@ -70,8 +67,6 @@ def test_columns_attributes_generated(self): skip_infer_columns=["c"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -291,18 +286,16 @@ def test_check_is_listed_in_columns_called(self, mocker): class TestConsolidateInputs(object): def test_arguments(self): """Test that _consolidate_inputs has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._consolidate_inputs, expected_arguments=["self", "X"], expected_default_values=None, ) - def test_infer_datetime_columns(self): + def test_infer_datetime_columns(self, df): """Test that _consolidate_inputs infers the correct datetime columns""" x = InputChecker(datetime_columns="infer") - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -333,13 +326,11 @@ def test_infer_datetime_columns(self): "e", ], "infer datetime not finding correct columns" - def test_infer_datetime_dict(self): + def test_infer_datetime_dict(self, df): """Test that _consolidate_inputs infers the correct datetime dict""" x = InputChecker(datetime_columns="infer") - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -362,12 +353,10 @@ def test_infer_datetime_dict(self): x.datetime_dict["d"]["minimum"] is True ), "infer numerical not specifying maximum value check as true" - def test_infer_categorical_columns(self): + def test_infer_categorical_columns(self, df): """Test that _consolidate_inputs infers the correct categorical columns""" x = InputChecker(categorical_columns="infer") - df = data_generators_p.create_df_2() - df["d"] = [True, True, False, True, True, False, np.nan] df["d"] = df["d"].astype("bool") @@ -380,26 +369,22 @@ def test_infer_categorical_columns(self): "d", ], "infer categorical not finding correct columns" - def test_infer_numerical_columns(self): + def test_infer_numerical_columns(self, df): """Test that _consolidate_inputs infers the correct numerical columns""" x = InputChecker(numerical_columns="infer") - df = data_generators_p.create_df_2() - x.fit(df) assert x.numerical_columns == [ "a" ], "infer numerical not finding correct columns" - def test_infer_numerical_skips_infer_columns(self): + def test_infer_numerical_skips_infer_columns(self, df): """Test that _consolidate_inputs skips right columns when inferring numerical""" x = InputChecker(numerical_columns="infer", skip_infer_columns=["a"]) - df = data_generators_p.create_df_2() - df["d"] = df["a"] x.fit(df) @@ -408,26 +393,22 @@ def test_infer_numerical_skips_infer_columns(self): "d" ], "infer numerical not finding correct columns when skipping infer columns" - def test_infer_categorical_skips_infer_columns(self): + def test_infer_categorical_skips_infer_columns(self, df): """Test that _consolidate_inputs skips right columns when inferring categorical""" x = InputChecker(categorical_columns="infer", skip_infer_columns=["b"]) - df = data_generators_p.create_df_2() - x.fit(df) assert x.categorical_columns == [ "c" ], "infer categorical not finding correct columns when skipping infer columns" - def test_infer_datetime_skips_infer_columns(self): + def test_infer_datetime_skips_infer_columns(self, df): """Test that _consolidate_inputs skips right columns when inferring datetime""" x = InputChecker(datetime_columns="infer", skip_infer_columns=["d"]) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -448,13 +429,11 @@ def test_infer_datetime_skips_infer_columns(self): "a" ], "infer datetime not finding correct columns when skipping infer columns" - def test_infer_numerical_dict(self): + def test_infer_numerical_dict(self, df): """Test that _consolidate_inputs infers the correct numerical dict""" x = InputChecker(numerical_columns="infer") - df = data_generators_p.create_df_2() - x.fit(df) assert ( @@ -465,13 +444,11 @@ def test_infer_numerical_dict(self): x.numerical_dict["a"]["minimum"] is True ), "infer numerical not specifying minimum value check as true" - def test_datetime_type(self): + def test_datetime_type(self, df): """Test that datetime columns is a list after calling _consolidate_inputs""" x = InputChecker(datetime_columns="infer") - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -490,26 +467,22 @@ def test_datetime_type(self): type(x.datetime_columns) is list ), f"incorrect datetime_columns type returned from _consolidate_inputs - expected: list but got: {type(x.datetime_columns)} " - def test_categorical_type(self): + def test_categorical_type(self, df): """Test that categorical columns is a list after calling _consolidate_inputs""" x = InputChecker(categorical_columns="infer") - df = data_generators_p.create_df_2() - x.fit(df) assert ( type(x.categorical_columns) is list ), f"incorrect categorical_columns type returned from _consolidate_inputs - expected: list but got: {type(x.categorical_columns)} " - def test_numerical_type(self): + def test_numerical_type(self, df): """Test that numerical columns and dict are a list and dict after calling _consolidate_inputs""" x = InputChecker(numerical_columns="infer") - df = data_generators_p.create_df_2() - x.fit(df) assert ( @@ -520,7 +493,7 @@ def test_numerical_type(self): type(x.numerical_dict) is dict ), f"incorrect numerical_dict type returned from _consolidate_inputs - expected: dict but got: {type(x.numerical_dict)} " - def test_check_is_subset_called(self, mocker): + def test_check_is_subset_called(self, df, mocker): """Test all check _is_subset is called by the _consolidate_inputs method.""" x = InputChecker( @@ -531,8 +504,6 @@ def test_check_is_subset_called(self, mocker): skip_infer_columns=["b"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -601,7 +572,7 @@ class TestFitTypeChecker(object): def test_arguments(self): """Test that InputChecker _fit_type_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._fit_type_checker, expected_arguments=["self", "X"] ) @@ -614,11 +585,9 @@ def test_no_column_classes_before_fit(self): hasattr(x, "column_classes") is False ), "column_classes attribute present before fit" - def test_column_classes_after_fit(self): + def test_column_classes_after_fit(self, df): """Test column_classes is present after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -627,11 +596,9 @@ def test_column_classes_after_fit(self): x, "column_classes" ), "column_classes attribute not present after fit" - def test_correct_columns_classes(self): + def test_correct_columns_classes(self, df): """Test fit type checker saves types for correct columns after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(columns=["a"]) x.fit(df) @@ -640,11 +607,9 @@ def test_correct_columns_classes(self): "a" ], f"incorrect values returned from _fit_value_checker - expected: ['a'] but got: {list(x.column_classes.keys())}" - def test_correct_classes_identified(self): + def test_correct_classes_identified(self, df): """Test fit type checker identifies correct classes is present after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker() df["d"] = pd.to_datetime( @@ -684,7 +649,7 @@ class TestFitNullChecker(object): def test_arguments(self): """Test that InputChecker _fit_null_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._fit_null_checker, expected_arguments=["self", "X"] ) @@ -695,22 +660,18 @@ def test_no_expected_values_before_fit(self): assert hasattr(x, "null_map") is False, "null_map attribute present before fit" - def test_expected_values_after_fit(self): + def test_expected_values_after_fit(self, df): """Test null_map is present after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) assert hasattr(x, "null_map"), "null_map attribute not present after fit" - def test_correct_columns_nulls(self): + def test_correct_columns_nulls(self, df): """Test fit nulls checker saves map for correct columns after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(columns=["a"]) x.fit(df) @@ -719,11 +680,9 @@ def test_correct_columns_nulls(self): "a" ], f"incorrect values returned from _fit_null_checker - expected: ['a'] but got: {list(x.null_map.keys())}" - def test_correct_classes_identified(self): + def test_correct_classes_identified(self, df): """Test fit null checker identifies correct columns with nulls after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker() df["b"] = df["b"].fillna("a") @@ -749,7 +708,7 @@ class TestFitValueChecker(object): def test_arguments(self): """Test that InputChecker _fit_value_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._fit_value_checker, expected_arguments=["self", "X"] ) @@ -762,11 +721,9 @@ def test_no_expected_values_before_fit(self): hasattr(x, "expected_values") is False ), "expected_values attribute present before fit" - def test_expected_values_after_fit(self): + def test_expected_values_after_fit(self, df): """Test expected_values is present after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(categorical_columns=["b", "c"]) x.fit(df) @@ -775,11 +732,9 @@ def test_expected_values_after_fit(self): x, "expected_values" ), "expected_values attribute not present after fit" - def test_correct_columns_map(self): + def test_correct_columns_map(self, df): """Test fit value checker saves levels for correct columns after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(categorical_columns=["b", "c"]) x.fit(df) @@ -789,11 +744,9 @@ def test_correct_columns_map(self): "c", ], f"incorrect values returned from _fit_value_checker - expected: ['b', 'c'] but got: {list(x.expected_values.keys())}" - def test_correct_values_identified(self): + def test_correct_values_identified(self, df): """Test fit value checker identifies corrcet levels after fit called""" - df = data_generators_p.create_df_2() - df["d"] = [True, True, False, True, True, False, np.nan] df["d"] = df["d"].astype("bool") @@ -834,7 +787,7 @@ class TestFitNumericalChecker(object): def test_arguments(self): """Test that InputChecker _fit_numerical_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._fit_numerical_checker, expected_arguments=["self", "X"] ) @@ -847,11 +800,9 @@ def test_no_expected_values_before_fit(self): hasattr(x, "numerical_values") is False ), "numerical_values attribute present before fit" - def test_expected_values_after_fit(self): + def test_expected_values_after_fit(self, df): """Test numerical_values is present after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -860,11 +811,9 @@ def test_expected_values_after_fit(self): x, "numerical_values" ), "numerical_values attribute not present after fit" - def test_correct_columns_num_values(self): + def test_correct_columns_num_values(self, df): """Test fit numerical checker saves values for correct columns after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -873,11 +822,9 @@ def test_correct_columns_num_values(self): "a" ], f"incorrect values returned from numerical_values - expected: ['a'] but got: {list(x.numerical_values.keys())}" - def test_correct_numerical_values_identified(self): + def test_correct_numerical_values_identified(self, df): """Test fit numerical checker identifies correct range values after fit called""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -890,11 +837,9 @@ def test_correct_numerical_values_identified(self): x.numerical_values["a"]["minimum"] == 1 ), f"incorrect values returned from _fit_numerical_checker - expected: 0 but got: {x.numerical_values['a']['minimum']}" - def test_correct_numerical_values_identified_dict(self): + def test_correct_numerical_values_identified_dict(self, df): """Test fit numerical checker identifies correct range values after fit called when inputting a dictionary""" - df = data_generators_p.create_df_2() - numerical_dict = {} numerical_dict["a"] = {} numerical_dict["a"]["maximum"] = True @@ -919,7 +864,7 @@ class TestFitDatetimeChecker(object): def test_arguments(self): """Test that InputChecker _fit_value_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._fit_datetime_checker, expected_arguments=["self", "X"] ) @@ -932,11 +877,9 @@ def test_no_datetime_values_before_fit(self): hasattr(x, "datetime_values") is False ), "datetime_values attribute present before fit" - def test_datetime_values_after_fit(self): + def test_datetime_values_after_fit(self, df): """Test datetime_values is present after fit called""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -968,11 +911,9 @@ def test_datetime_values_after_fit(self): x, "datetime_values" ), "datetime_values attribute not present after fit" - def test_correct_columns_map(self): + def test_correct_columns_map(self, df): """Test fit datetime checker saves minimum dates for correct columns after fit called""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1005,11 +946,9 @@ def test_correct_columns_map(self): "e", ], f"incorrect values returned from _fit_datetime_checker - expected: ['d', 'e'] but got: {list(x.datetime_values.keys())} " - def test_correct_datetime_values_identified(self): + def test_correct_datetime_values_identified(self, df): """Test fit datetime checker identifies correct minimum bound after fit called""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1039,11 +978,9 @@ def test_correct_datetime_values_identified(self): actual_max_d is None ), f"incorrect values returned from _fit_datetime_checker - expected: None, but got: {actual_max_d}" - def test_correct_datetime_values_identified_dict(self): + def test_correct_datetime_values_identified_dict(self, df): """Test fit datetime checker identifies correct range values after fit called when inputting a dictionary""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1083,33 +1020,27 @@ class TestFit(object): def test_arguments(self): """Test that InputChecker fit has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker.fit, expected_arguments=["self", "X", "y"], expected_default_values=(None,), ) - def test_super_fit_called(self, mocker): + def test_super_fit_called(self, df, mocker): """Test that BaseTransformer fit called.""" - expected_call_args = { - 0: {"args": (data_generators_p.create_df_2(), None), "kwargs": {}} - } - - df = data_generators_p.create_df_2() + expected_call_args = {0: {"args": (df, None), "kwargs": {}}} x = InputChecker(columns=["a"]) - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "fit", expected_call_args ): x.fit(df) - def test_all_columns_selected(self): + def test_all_columns_selected(self, df): """Test fit selects all columns when columns parameter set to None""" - df = data_generators_p.create_df_2() - x = InputChecker(columns=None) assert ( @@ -1124,26 +1055,22 @@ def test_all_columns_selected(self): "c", ], f"incorrect columns identified when columns parameter set to None - expected: ['a', 'b', 'c'] but got: {x.columns}" - def test_fit_returns_self(self): + def test_fit_returns_self(self, df): """Test fit returns self?""" - df = data_generators_p.create_df_2() - x = InputChecker() x_fitted = x.fit(df) assert x_fitted is x, "Returned value from InputChecker.fit not as expected." - def test_no_optional_calls_fit(self): + def test_no_optional_calls_fit(self, df): """Test numerical_values and expected_values is not present after fit if parameters set to None""" x = InputChecker( numerical_columns=None, categorical_columns=None, datetime_columns=None ) - df = data_generators_p.create_df_2() - x.fit(df) assert ( @@ -1158,15 +1085,13 @@ def test_no_optional_calls_fit(self): hasattr(x, "datetime_values") is False ), "datetime_values attribute present with datetime_columns set to None" - def test_compulsory_checks_generated_with_no_optional_calls_fit(self): + def test_compulsory_checks_generated_with_no_optional_calls_fit(self, df): """Test null_map and column_classes are present after fit when optional parameters set to None""" x = InputChecker( numerical_columns=None, categorical_columns=None, datetime_columns=None ) - df = data_generators_p.create_df_2() - x.fit(df) assert ( @@ -1177,7 +1102,7 @@ def test_compulsory_checks_generated_with_no_optional_calls_fit(self): hasattr(x, "column_classes") is True ), "column_classes attribute not present when optional checks set to None" - def test_all_checks_generated(self): + def test_all_checks_generated(self, df): """Test all checks are generated when all optional parameters set""" x = InputChecker( @@ -1187,8 +1112,6 @@ def test_all_checks_generated(self): datetime_columns=["d"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1223,7 +1146,7 @@ def test_all_checks_generated(self): hasattr(x, "column_classes") is True ), "column_classes attribute not present after fit" - def test_check_df_is_empty_called(self, mocker): + def test_check_df_is_empty_called(self, df, mocker): """Test check is df empty is called by the fit method.""" x = InputChecker( @@ -1232,8 +1155,6 @@ def test_check_df_is_empty_called(self, mocker): categorical_columns=["b", "c"], ) - df = data_generators_p.create_df_2() - spy = mocker.spy(input_checker.checker.InputChecker, "_df_is_empty") x.fit(df) @@ -1258,33 +1179,29 @@ class TestTransformTypeChecker(object): def test_arguments(self): """Test that InputChecker _transform_type_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._transform_type_checker, expected_arguments=["self", "X", "batch_mode"], expected_default_values=(False,), ) - def test_check_fitted_called(self, mocker): + def test_check_fitted_called(self, df, mocker): """Test that transform calls BaseTransformer.check_is_fitted.""" expected_call_args = {0: {"args": (["column_classes"],), "kwargs": {}}} x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x._transform_type_checker(df) - def test_transform_returns_failed_checks_dict(self): + def test_transform_returns_failed_checks_dict(self, df): """Test _transform_type_checker returns results dictionary""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -1295,11 +1212,9 @@ def test_transform_returns_failed_checks_dict(self): type_checker_failed_checks, dict ), f"incorrect type results type identified - expected: dict but got: {type(type_checker_failed_checks)}" - def test_transform_passes(self): + def test_transform_passes(self, df): """Test _transform_type_checker passes all the checks on the training dataframe""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -1310,11 +1225,9 @@ def test_transform_passes(self): type_checker_failed_checks == {} ), f"Type checker found failed tests - {list(type_checker_failed_checks.keys())}" - def test_transform_passes_column_all_nulls(self): + def test_transform_passes_column_all_nulls(self, df): """Test _transform_type_checker passes all the checks on the training dataframe when a column contains only nulls""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -1327,11 +1240,9 @@ def test_transform_passes_column_all_nulls(self): type_checker_failed_checks == {} ), f"Type checker found failed tests - {list(type_checker_failed_checks.keys())}" - def test_transform_captures_failed_test(self): + def test_transform_captures_failed_test(self, df): """Test _transform_type_checker captures a failed check""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -1350,11 +1261,9 @@ def test_transform_captures_failed_test(self): type_checker_failed_checks["a"]["expected"] == exp_type ), f"incorrect values saved to type_checker_failed_checks expected types - expected: [{exp_type}] but got: {type_checker_failed_checks['a']['types']}" - def test_transform_passes_batch_mode(self): + def test_transform_passes_batch_mode(self, df): """Test _transform_type_checker passes all the checks on the training dataframe""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -1365,11 +1274,9 @@ def test_transform_passes_batch_mode(self): type_checker_failed_checks == {} ), f"Type checker found failed tests - {list(type_checker_failed_checks.keys())}" - def test_transform_captures_failed_test_batch_mode(self): + def test_transform_captures_failed_test_batch_mode(self, df): """Test _transform_type_checker handles mixed types""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1430,31 +1337,27 @@ class TestTransformNullChecker(object): def test_arguments(self): """Test that InputChecker _transform_null_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._transform_null_checker, expected_arguments=["self", "X"] ) - def test_check_fitted_called(self, mocker): + def test_check_fitted_called(self, df, mocker): """Test that transform calls BaseTransformer.check_is_fitted.""" expected_call_args = {0: {"args": (["null_map"],), "kwargs": {}}} x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x._transform_null_checker(df) - def test_transform_returns_failed_checks_dict(self): + def test_transform_returns_failed_checks_dict(self, df): """Test _transform_null_checker returns results dictionary""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -1465,11 +1368,9 @@ def test_transform_returns_failed_checks_dict(self): null_checker_failed_checks, dict ), f"incorrect null results type identified - expected: dict but got: {type(null_checker_failed_checks)}" - def test_transform_passes(self): + def test_transform_passes(self, df): """Test _transform_null_checker passes all the checks on the training dataframe""" - df = data_generators_p.create_df_2() - df["b"] = df["b"].fillna("a") x = InputChecker() @@ -1482,11 +1383,9 @@ def test_transform_passes(self): null_checker_failed_checks == {} ), f"Null checker found failed tests - {list(null_checker_failed_checks.keys())}" - def test_transform_captures_failed_test(self): + def test_transform_captures_failed_test(self, df): """Test _transform_null_checker captures a failed check""" - df = data_generators_p.create_df_2() - df["b"] = df["b"].fillna("a") x = InputChecker() @@ -1508,36 +1407,29 @@ class TestTransformNumericalChecker(object): def test_arguments(self): """Test that InputChecker _transform_numerical_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._transform_numerical_checker, expected_arguments=["self", "X", "type_fails", "batch_mode"], - expected_default_values=( - {}, - False, - ), + expected_default_values=({}, False,), ) - def test_check_fitted_called(self, mocker): + def test_check_fitted_called(self, df, mocker): """Test that transform calls BaseTransformer.check_is_fitted.""" expected_call_args = {0: {"args": (["numerical_values"],), "kwargs": {}}} x = InputChecker(numerical_columns=["a"]) - df = data_generators_p.create_df_2() - x.fit(df) - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x._transform_numerical_checker(df, {}) - def test_transform_returns_failed_checks_dict(self): + def test_transform_returns_failed_checks_dict(self, df): """Test _transform_numerical_checker returns results dictionary""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -1548,11 +1440,9 @@ def test_transform_returns_failed_checks_dict(self): numerical_checker_failed_checks, dict ), f"incorrect numerical results type identified - expected: dict but got: {type(numerical_checker_failed_checks)}" - def test_transform_passes(self): + def test_transform_passes(self, df): """Test _transform_numerical_checker passes all the numerical checks on the training dataframe""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -1563,11 +1453,9 @@ def test_transform_passes(self): numerical_checker_failed_checks == {} ), f"Numerical checker found failed tests - {list(numerical_checker_failed_checks.keys())}" - def test_transform_captures_failed_test(self): + def test_transform_captures_failed_test(self, df): """Test _transform_numerical_checker captures a failed check""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -1588,11 +1476,9 @@ def test_transform_captures_failed_test(self): numerical_checker_failed_checks["a"]["minimum"] == expected_min ), f"incorrect values saved to numerical_checker_failed_checks - expected: {expected_min} but got: {numerical_checker_failed_checks['a']['minimum']}" - def test_transform_captures_failed_test_only_maximum(self): + def test_transform_captures_failed_test_only_maximum(self, df): """Test _transform_numerical_checker captures a failed check when the check includes a maximum value but no minimum value""" - df = data_generators_p.create_df_2() - numerical_dict = {} numerical_dict["a"] = {} numerical_dict["a"]["maximum"] = True @@ -1617,11 +1503,9 @@ def test_transform_captures_failed_test_only_maximum(self): "minimum" not in numerical_checker_failed_checks["a"] ), "No minimum value results expected given input the numerical dict" - def test_transform_captures_failed_test_only_minimum(self): + def test_transform_captures_failed_test_only_minimum(self, df): """Test _transform_numerical_checker captures a failed check when the check includes a minimum value but no maximum value""" - df = data_generators_p.create_df_2() - numerical_dict = {} numerical_dict["a"] = {} numerical_dict["a"]["maximum"] = False @@ -1646,12 +1530,10 @@ def test_transform_captures_failed_test_only_minimum(self): "maximum" not in numerical_checker_failed_checks["a"] ), "No maximum value results expected given input the numerical dict" - def test_transform_skips_failed_type_checks_batch_mode(self): + def test_transform_skips_failed_type_checks_batch_mode(self, df): """Test _transform_numerical_checker skips checks for rows which aren't numerical when operating in batch mode""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -1670,18 +1552,16 @@ def test_transform_skips_failed_type_checks_batch_mode(self): df, type_fails_dict, batch_mode=True ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( actual=numerical_checker_failed_checks, expected=expected_output, msg="rows failing type check have not been removed by _transform_numerical_checker", ) - def test_transform_skips_failed_type_checks(self): + def test_transform_skips_failed_type_checks(self, df): """Test _transform_numerical_checker skips checks for columns which aren't numerical when not operating in batch mode""" - df = data_generators_p.create_df_2() - x = InputChecker(numerical_columns=["a"]) x.fit(df) @@ -1698,7 +1578,7 @@ def test_transform_skips_failed_type_checks(self): df_test, type_fails_dict, batch_mode=False ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( actual=numerical_checker_failed_checks, expected={}, msg="rows failing type check have not been removed by _transform_numerical_checker", @@ -1717,7 +1597,7 @@ def test_transform_skips_failed_type_checks(self): df_test2, type_fails_dict2, batch_mode=False ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( actual=numerical_checker_failed_checks2, expected={"a": {"max idxs": [2], "maximum": {2: 222}}}, msg="rows failing type check have not been removed by _transform_numerical_checker", @@ -1730,31 +1610,27 @@ class TestTransformValueChecker(object): def test_arguments(self): """Test that InputChecker _transform_value_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._transform_value_checker, expected_arguments=["self", "X"] ) - def test_check_fitted_called(self, mocker): + def test_check_fitted_called(self, df, mocker): """Test that transform calls BaseTransformer.check_is_fitted.""" expected_call_args = {0: {"args": (["expected_values"],), "kwargs": {}}} x = InputChecker(categorical_columns=["b", "c"]) - df = data_generators_p.create_df_2() - x.fit(df) - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x._transform_value_checker(df) - def test_transform_returns_failed_checks_dict(self): + def test_transform_returns_failed_checks_dict(self, df): """Test _transform_value_checker returns results dictionary""" - df = data_generators_p.create_df_2() - x = InputChecker(categorical_columns=["b", "c"]) x.fit(df) @@ -1765,11 +1641,9 @@ def test_transform_returns_failed_checks_dict(self): value_checker_failed_checks, dict ), f"incorrect numerical results type identified - expected: dict but got: {type(value_checker_failed_checks)}" - def test_transform_passes(self): + def test_transform_passes(self, df): """Test _transform_value_checker passes all the categorical checks on the training dataframe""" - df = data_generators_p.create_df_2() - x = InputChecker(categorical_columns=["b", "c"]) x.fit(df) @@ -1780,11 +1654,9 @@ def test_transform_passes(self): value_checker_failed_checks == {} ), f"Categorical checker found failed tests - {list(value_checker_failed_checks.keys())}" - def test_transform_captures_failed_test(self): + def test_transform_captures_failed_test(self, df): """Test _transform_value_checker captures a failed check""" - df = data_generators_p.create_df_2() - x = InputChecker(categorical_columns=["b", "c"]) x.fit(df) @@ -1808,23 +1680,19 @@ class TestTransformDatetimeChecker(object): def test_arguments(self): """Test that InputChecker _transform_datetime_checker has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._transform_datetime_checker, expected_arguments=["self", "X", "type_fails", "batch_mode"], - expected_default_values=( - {}, - False, - ), + expected_default_values=({}, False,), ) - def test_check_fitted_called(self, mocker): + def test_check_fitted_called(self, df, mocker): """Test that transform calls BaseTransformer.check_is_fitted.""" expected_call_args = {0: {"args": (["datetime_values"],), "kwargs": {}}} x = InputChecker(datetime_columns=["d"]) - df = data_generators_p.create_df_2() df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1839,16 +1707,14 @@ def test_check_fitted_called(self, mocker): x.fit(df) - with h.assert_function_call( + with ta.functions.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x._transform_datetime_checker(df, {}) - def test_transform_returns_failed_checks_dict(self): + def test_transform_returns_failed_checks_dict(self, df): """Test _transform_datetime_checker returns results dictionary""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1871,11 +1737,9 @@ def test_transform_returns_failed_checks_dict(self): datetime_checker_failed_checks, dict ), f"incorrect datetime results type identified - expected: dict but got: {type(datetime_checker_failed_checks)}" - def test_transform_passes(self): + def test_transform_passes(self, df): """Test _transform_datetime_checker passes all the numerical checks on the training dataframe""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1898,11 +1762,9 @@ def test_transform_passes(self): datetime_checker_failed_checks == {} ), f"Datetime checker found failed tests - {list(datetime_checker_failed_checks.keys())}" - def test_transform_captures_failed_test(self): + def test_transform_captures_failed_test(self, df): """Test _transform_datetime_checker captures a failed check""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1939,12 +1801,10 @@ def test_transform_captures_failed_test(self): f"expected: {outliers_2} but got: {results[1]} " ) - def test_transform_captures_failed_test_both_minimum_and_maximum(self): + def test_transform_captures_failed_test_both_minimum_and_maximum(self, df): """Test _transform_datetime_checker captures a failed check when the check includes a maximum value and a minimum value""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -1988,12 +1848,10 @@ def test_transform_captures_failed_test_both_minimum_and_maximum(self): f"{datetime_checker_failed_checks['d']['minimum']} " ) - def test_transform_skips_failed_type_checks_batch_mode(self): + def test_transform_skips_failed_type_checks_batch_mode(self, df): """Test _transform_datetime_checker skips checks for rows which aren't datetime type when operating in batch mode""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2026,7 +1884,7 @@ def test_transform_skips_failed_type_checks_batch_mode(self): df, type_fails_dict, batch_mode=True ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( actual=datetime_checker_failed_checks, expected={ "d": { @@ -2037,12 +1895,10 @@ def test_transform_skips_failed_type_checks_batch_mode(self): msg="rows failing type check have not been removed by _transform_datetime_checker", ) - def test_transform_skips_failed_type_checks(self): + def test_transform_skips_failed_type_checks(self, df): """Test _transform_datetime_checker skips checks for columns which aren't datetime when not operating in batch mode""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2069,7 +1925,7 @@ def test_transform_skips_failed_type_checks(self): df_test, type_fails_dict, batch_mode=False ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( actual=datetime_checker_failed_checks, expected={}, msg="rows failing type check have not been removed by _transform_datetime_checker", @@ -2081,13 +1937,13 @@ class TestTransform(object): def test_arguments(self): """Test that transform has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker.transform, expected_arguments=["self", "X", "batch_mode"], expected_default_values=(False,), ) - def test_super_transform_called(self, mocker): + def test_super_transform_called(self, df, mocker): """Test super transform is called by the transform method.""" x = InputChecker( @@ -2097,8 +1953,6 @@ def test_super_transform_called(self, mocker): datetime_columns=["d"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2121,11 +1975,9 @@ def test_super_transform_called(self, mocker): spy.call_count == 1 ), "unexpected number of calls to tubular.base.BaseTransformer.transform with transform" - def test_transform_returns_df(self): + def test_transform_returns_df(self, df): """Test fit returns df""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2148,11 +2000,9 @@ def test_transform_returns_df(self): df ), "Returned value from InputChecker.transform not as expected." - def test_batch_mode_transform_returns_df(self): + def test_batch_mode_transform_returns_df(self, df): """Test fit returns df""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2175,13 +2025,13 @@ def test_batch_mode_transform_returns_df(self): df ), "Returned value from InputChecker.transform not as expected." - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=df, actual=df_transformed, msg="Returned df of passed rows from InputChecker.transform not as expected.", ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=pd.DataFrame( columns=df.columns.values.tolist() + ["failed_checks"] ), @@ -2189,7 +2039,7 @@ def test_batch_mode_transform_returns_df(self): msg="Returned df of failed rows from InputChecker.transform not as expected.", ) - def test_check_df_is_empty_called(self, mocker): + def test_check_df_is_empty_called(self, df, mocker): """Test check is df empty is called by the transform method.""" x = InputChecker( @@ -2199,8 +2049,6 @@ def test_check_df_is_empty_called(self, mocker): datetime_columns=["d"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2228,13 +2076,13 @@ def test_check_df_is_empty_called(self, mocker): expected_pos_args_0 = (x, "scoring dataframe", df) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=expected_pos_args_0, actual=call_0_pos_args, msg="positional args unexpected in _df_is_empty call for scoring dataframe argument", ) - def test_non_optional_transforms_always_called(self, mocker): + def test_non_optional_transforms_always_called(self, df, mocker): """Test non-optional checks are called by the transform method irrespective of categorical_columns, numerical_columns & datetime_columns values.""" @@ -2242,8 +2090,6 @@ def test_non_optional_transforms_always_called(self, mocker): numerical_columns=None, categorical_columns=None, datetime_columns=None ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2278,15 +2124,13 @@ def test_non_optional_transforms_always_called(self, mocker): "categorical_columns set to None " ) - def test_optional_transforms_not_called(self, mocker): + def test_optional_transforms_not_called(self, df, mocker): """Test optional checks are not called by the transform method.""" x = InputChecker( numerical_columns=None, categorical_columns=None, datetime_columns=None ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2327,14 +2171,12 @@ def test_optional_transforms_not_called(self, mocker): spy_datetime.call_count == 0 ), "unexpected number of calls to _transform_datetime_checker with transform when datetime_columns set to None" - def test_raise_exception_if_checks_fail_called_no_optionals(self, mocker): + def test_raise_exception_if_checks_fail_called_no_optionals(self, df, mocker): """Test raise exception is called by the transform method when categorical, numerical_& datetime columns set to None.""" x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) spy = mocker.spy( @@ -2369,7 +2211,7 @@ def test_raise_exception_if_checks_fail_called_no_optionals(self, mocker): expected_pos_args_0 == call_0_pos_args ), "positional args unexpected in raise_exception_if_checks_fail call in transform method" - def test_raise_exception_if_checks_fail_called_all_checks(self, mocker): + def test_raise_exception_if_checks_fail_called_all_checks(self, df, mocker): """Test raise exception is called by the transform method when categorical_columns and numerical_columns set to None.""" @@ -2379,8 +2221,6 @@ def test_raise_exception_if_checks_fail_called_all_checks(self, mocker): datetime_columns=["d"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2427,14 +2267,12 @@ def test_raise_exception_if_checks_fail_called_all_checks(self, mocker): expected_pos_args_0 == call_0_pos_args ), "positional args unexpected in raise_exception_if_checks_fail call in transform method" - def test_separate_passes_and_fails_called_no_optionals(self, mocker): + def test_separate_passes_and_fails_called_no_optionals(self, df, mocker): """Test raise exception is called by the transform method when categorical, numerical_& datetime columns set to None.""" x = InputChecker() - df = data_generators_p.create_df_2() - orig_df = df.copy(deep=True) x.fit(df) @@ -2468,13 +2306,13 @@ def test_separate_passes_and_fails_called_no_optionals(self, mocker): orig_df, ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=expected_pos_args_0, actual=call_0_pos_args, msg="positional args unexpected in separate_passes_and_fails call in transform method", ) - def test_separate_passes_and_fails_called_all_checks(self, mocker): + def test_separate_passes_and_fails_called_all_checks(self, df, mocker): """Test raise exception is called by the transform method when categorical_columns and numerical_columns set to None.""" @@ -2484,8 +2322,6 @@ def test_separate_passes_and_fails_called_all_checks(self, mocker): datetime_columns=["d"], ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2531,7 +2367,7 @@ def test_separate_passes_and_fails_called_all_checks(self, mocker): orig_df, ) - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=expected_pos_args_0, actual=call_0_pos_args, msg="positional args unexpected in separate_passes_and_fails call in transform method", @@ -2543,7 +2379,7 @@ class TestRaiseExceptionIfChecksFail(object): def test_arguments(self): """Test that raise_exception_if_checks_fail has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker.raise_exception_if_checks_fail, expected_arguments=[ "self", @@ -2556,24 +2392,20 @@ def test_arguments(self): expected_default_values=None, ) - def test_no_failed_checks_before_transform(self): + def test_no_failed_checks_before_transform(self, df): """Test validation_failed_checks is not present before transform""" x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) assert ( hasattr(x, "validation_failed_checks") is False ), "validation_failed_checks attribute present before transform" - def test_validation_failed_checks_saved(self): + def test_validation_failed_checks_saved(self, df): """Test raise_exception_if_checks_fail saves the validation results""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -2588,11 +2420,9 @@ def test_validation_failed_checks_saved(self): x.validation_failed_checks, dict ), f"incorrect validation results type identified - expected: dict but got: {type(x.validation_failed_checks)}" - def test_correct_validation_failed_checks(self): + def test_correct_validation_failed_checks(self, df): """Test raise_exception_if_checks_fail saves and prints the correct error message""" - df = data_generators_p.create_df_2() - x = InputChecker() x.fit(df) @@ -2623,13 +2453,11 @@ def test_correct_validation_failed_checks(self): x.validation_failed_checks["Exception message"], str ), f"incorrect exception message type identified - expected: str but got: {type(x.validation_failed_checks['Exception message'])}" - def test_input_checker_error_raised_type(self): + def test_input_checker_error_raised_type(self, df): """Test InputCheckerError is raised if type test fails""" x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) df.loc[5, "a"] = "a" @@ -2637,13 +2465,11 @@ def test_input_checker_error_raised_type(self): with pytest.raises(InputCheckerError): df = x.transform(df) - def test_input_checker_error_raised_nulls(self): + def test_input_checker_error_raised_nulls(self, df): """Test InputCheckerError is raised if null test fails""" x = InputChecker() - df = data_generators_p.create_df_2() - df["b"] = df["b"].fillna("a") x = InputChecker() @@ -2655,13 +2481,11 @@ def test_input_checker_error_raised_nulls(self): with pytest.raises(InputCheckerError): df = x.transform(df) - def test_input_checker_error_raised_categorical(self): + def test_input_checker_error_raised_categorical(self, df): """Test InputCheckerError is raised if categorical test fails""" x = InputChecker(categorical_columns=["b"]) - df = data_generators_p.create_df_2() - x.fit(df) df.loc[5, "b"] = "u" @@ -2669,13 +2493,11 @@ def test_input_checker_error_raised_categorical(self): with pytest.raises(InputCheckerError): df = x.transform(df) - def test_input_checker_error_raised_numerical(self): + def test_input_checker_error_raised_numerical(self, df): """Test InputCheckerError is raised if numerical test fails""" x = InputChecker(numerical_columns=["a"]) - df = data_generators_p.create_df_2() - x.fit(df) df.loc[0, "a"] = -1 @@ -2683,11 +2505,9 @@ def test_input_checker_error_raised_numerical(self): with pytest.raises(InputCheckerError): df = x.transform(df) - def test_input_checker_error_raised_datetime(self): + def test_input_checker_error_raised_datetime(self, df): """Test InputCheckerError is raised if datetime test fails""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2713,13 +2533,11 @@ def test_input_checker_error_raised_datetime(self): with pytest.raises(InputCheckerError): df = x.transform(df) - def test_validation_failed_checks_correctly_stores_fails(self): + def test_validation_failed_checks_correctly_stores_fails(self, df): """Test correct data is saved in validation_failed_checks after a failed check exception""" x = InputChecker() - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -2794,7 +2612,7 @@ class TestSeparatePassAndFails(object): def test_arguments(self): """Test that separate_passes_and_fails has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker.separate_passes_and_fails, expected_arguments=[ "self", @@ -2808,13 +2626,11 @@ def test_arguments(self): expected_default_values=None, ) - def test_input_checker_type_errors_shape(self): + def test_input_checker_type_errors_shape(self, df): """Test correct dataframes are returned if type test fails""" x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) df.loc[5, "a"] = "a" @@ -2837,13 +2653,11 @@ def test_input_checker_type_errors_shape(self): df.shape[1] + 1 ), f"Wrong number of columns for bad dataframe. Was expecting {df.shape[1]+1}, instead returned {bad_df.shape[1]}" - def test_input_checker_type_errors_column(self): + def test_input_checker_type_errors_column(self, df): """Test correct error column message is returned if type test fails""" x = InputChecker() - df = data_generators_p.create_df_2() - x.fit(df) df.loc[5, "a"] = "a" @@ -2867,13 +2681,11 @@ def test_input_checker_type_errors_column(self): actual[0] == expected ), f"Values in failed_checks not as expected: actual: {actual} expected: {expected}" - def test_input_checker_null_errors_shape(self): + def test_input_checker_null_errors_shape(self, df): """Test correct dataframes are returned if null test fails""" x = InputChecker() - df = data_generators_p.create_df_2() - df["b"] = df["b"].fillna("a") x.fit(df) @@ -2900,13 +2712,11 @@ def test_input_checker_null_errors_shape(self): df.shape[1] + 1 ), f"Wrong number of columns for bad dataframe. Was expecting {df.shape[1]+1}, instead returned {bad_df.shape[1]}" - def test_input_checker_null_errors_column(self): + def test_input_checker_null_errors_column(self, df): """Test correct error column message is returned if null test fails""" x = InputChecker() - df = data_generators_p.create_df_2() - df["b"] = df["b"].fillna("a") x.fit(df) @@ -2923,15 +2733,15 @@ def test_input_checker_null_errors_column(self): expected = "Failed null check for column: b" - h.assert_equal_msg(message, expected, "Value in Reason Failed not as expected") + ta.equality.assert_equal_msg( + message, expected, "Value in Reason Failed not as expected" + ) - def test_input_checker_categorical_errors_shape(self): + def test_input_checker_categorical_errors_shape(self, df): """Test correct dataframes are returned if categorical test fails""" x = InputChecker(categorical_columns=["b"]) - df = data_generators_p.create_df_2() - x.fit(df) df.loc[5, "b"] = "u" @@ -2956,13 +2766,11 @@ def test_input_checker_categorical_errors_shape(self): df.shape[1] + 1 ), f"Wrong number of columns for bad dataframe. Was expecting {df.shape[1]+1}, instead returned {bad_df.shape[1]}" - def test_input_checker_categorical_errors_column(self): + def test_input_checker_categorical_errors_column(self, df): """Test correct error column message is returned if categorical test fails""" x = InputChecker(categorical_columns=["b"]) - df = data_generators_p.create_df_2() - x.fit(df) df.loc[5, "b"] = "u" @@ -2977,15 +2785,15 @@ def test_input_checker_categorical_errors_column(self): expected = "Failed categorical check for column: b. Unexpected values are ['u']" - h.assert_equal_msg(message, expected, "Value in failed_checks not as expected") + ta.equality.assert_equal_msg( + message, expected, "Value in failed_checks not as expected" + ) - def test_input_checker_numerical_errors_shape(self): + def test_input_checker_numerical_errors_shape(self, df): """Test correct dataframes are returned if numerical test fails""" x = InputChecker(numerical_columns=["a"]) - df = data_generators_p.create_df_2() - x.fit(df) df.loc[0, "a"] = -1 @@ -3010,13 +2818,11 @@ def test_input_checker_numerical_errors_shape(self): df.shape[1] + 1 ), f"Wrong number of columns for bad dataframe. Was expecting {df.shape[1]+1}, instead returned {bad_df.shape[1]}" - def test_input_checker_numerical_errors_column(self): + def test_input_checker_numerical_errors_column(self, df): """Test correct error column message is returned if numerical test fails""" x = InputChecker(numerical_columns=["a"]) - df = data_generators_p.create_df_2() - x.fit(df) df.loc[0, "a"] = -1 @@ -3031,13 +2837,13 @@ def test_input_checker_numerical_errors_column(self): expected = "Failed minimum value check for column: a; Value below minimum: -1.0" - h.assert_equal_msg(message, expected, "Value in Reason Fails not as expected") + ta.equality.assert_equal_msg( + message, expected, "Value in Reason Fails not as expected" + ) - def test_input_checker_datetime_errors_shape(self): + def test_input_checker_datetime_errors_shape(self, df): """Test correct dataframes are returned if datetime test fails""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -3082,11 +2888,9 @@ def test_input_checker_datetime_errors_shape(self): df.shape[1] + 1 ), f"Wrong number of columns for bad dataframe. Was expecting {df.shape[1]+1}, instead returned {bad_df.shape[1]}" - def test_input_checker_datetime_errors_column(self): + def test_input_checker_datetime_errors_column(self, df): """Test correct error column message is returned if numerical test fails""" - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -3125,22 +2929,20 @@ def test_input_checker_datetime_errors_column(self): "Failed minimum value check for column: d; Value below minimum: 2017-09-13" ) - h.assert_equal_msg( + ta.equality.assert_equal_msg( message_0, expected_0, "Value in Reason Failed not as expected" ) - h.assert_equal_msg( + ta.equality.assert_equal_msg( message_1, expected_1, "Value in Reason Failed not as expected" ) - def test_full_failed_checks(self): + def test_full_failed_checks(self, df): """Test correct data is outputted for multiple failed exceptions""" x = InputChecker( numerical_columns=["a"], datetime_columns=["d"], categorical_columns=["b"] ) - df = data_generators_p.create_df_2() - df["d"] = pd.to_datetime( [ "01/02/2020", @@ -3193,25 +2995,25 @@ def test_full_failed_checks(self): expected_msg_5 = "Failed null check for column: b" - h.assert_equal_msg( + ta.equality.assert_equal_msg( bad_df["failed_checks"].loc[0], expected_msg_0, "Wrong message in reason failed for index 0", ) - h.assert_equal_msg( + ta.equality.assert_equal_msg( bad_df["failed_checks"].loc[2], expected_msg_2, "Wrong message in reason failed for index 2", ) - h.assert_equal_msg( + ta.equality.assert_equal_msg( bad_df["failed_checks"].loc[4], expected_msg_4, "Wrong message in reason failed for index 4", ) - h.assert_equal_msg( + ta.equality.assert_equal_msg( bad_df["failed_checks"].loc[5], expected_msg_5, "Wrong message in reason failed for index 5", @@ -3224,8 +3026,7 @@ def test_multiple_value_error_fails_on_same_row(self): df = pd.DataFrame({"col1": ["a", "b", "c"], "col2": ["a", "b", "c"]}) checker = InputChecker( - columns=["col1", "col2"], - categorical_columns=["col1", "col2"], + columns=["col1", "col2"], categorical_columns=["col1", "col2"], ) checker.fit(df) @@ -3240,7 +3041,7 @@ def test_multiple_value_error_fails_on_same_row(self): 1 ], "Wrong rows in bad_df when a row fails multiple value checks" - h.assert_equal_msg( + ta.equality.assert_equal_msg( bad_df["failed_checks"].loc[1], expected_msg, "Wrong message in reason failed when a row fails multiple value checks", @@ -3252,7 +3053,7 @@ class TestUpdateBadDF(object): def test_arguments(self): """Test that _update_bad_df has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._update_bad_df, expected_arguments=[ "self", @@ -3264,19 +3065,17 @@ def test_arguments(self): expected_default_values=(None,), ) - def test_expected_output(self): + def test_expected_output(self, df): """Test that _update_bad_df works as expected.""" x = InputChecker(numerical_columns=["u"]) - df = data_generators_p.create_df_2() - df["failed_checks"] = "fail 1" bad_df = x._update_bad_df(df, [2, 4], "fail 2") # check message updated as expected - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=[ "fail 1", "fail 1", @@ -3291,7 +3090,7 @@ def test_expected_output(self): ) # check other columns unchanged - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=df, actual=bad_df[df.columns], msg="other columns have been modified by _update_bad_df", @@ -3303,7 +3102,7 @@ class TestUpdateGoodBadDF(object): def test_arguments(self): """Test that _update_good_bad_df has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._update_good_bad_df, expected_arguments=[ "self", @@ -3316,13 +3115,11 @@ def test_arguments(self): expected_default_values=(None,), ) - def test_expected_output(self): + def test_expected_output(self, df): """Test that _update_good_bad_df works as expected.""" x = InputChecker(numerical_columns=["u"]) - df = data_generators_p.create_df_2() - bad_df = df.loc[[2, 4]] good_df = df.loc[[0, 1, 3, 5, 6]] bad_df["failed_checks"] = "fail 1" @@ -3330,21 +3127,21 @@ def test_expected_output(self): good_df_up, bad_df_up = x._update_good_bad_df(good_df, bad_df, [3, 6], "fail 2") # check message in bad_df updated as expected - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=["fail 1", "fail 1", "fail 2", "fail 2"], actual=bad_df_up["failed_checks"].values.tolist(), msg="failed_checks not updated as expected by _update_good_bad_df", ) # check other columns in bad_df unchanged - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=df.loc[[2, 4, 3, 6], :], actual=bad_df_up[df.columns], msg="other columns have been modified in bad_df by _update_good_bad_df", ) # check good_df - h.assert_equal_dispatch( + ta.equality.assert_equal_dispatch( expected=df.loc[[0, 1, 5], :], actual=good_df_up, msg="wrong good_df returned by _update_good_bad_df", @@ -3356,7 +3153,7 @@ class TestCheckType(object): def test_arguments(self): """Test that _check_type has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._check_type, expected_arguments=["self", "obj", "obj_name", "options"], expected_default_values=None, @@ -3375,7 +3172,7 @@ class TestIsStringValue(object): def test_arguments(self): """Test that _check_type has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._is_string_value, expected_arguments=["self", "string", "string_name", "check_value"], expected_default_values=None, @@ -3393,19 +3190,19 @@ class TestIsSubset(object): def test_arguments(self): """Test that _is_subset has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._is_subset, expected_arguments=["self", "obj_name", "columns", "dataframe"], expected_default_values=None, ) - def test_exception(self): + def test_exception(self, df): """Test that _is_subset fails with the correct error.""" x = InputChecker(numerical_columns=["u"]) with pytest.raises(ValueError): - x.fit(data_generators_p.create_df_2()) + x.fit(df) class TestIsEmpty(object): @@ -3413,7 +3210,7 @@ class TestIsEmpty(object): def test_arguments(self): """Test that _is_empty has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._is_empty, expected_arguments=["self", "obj_name", "obj"], expected_default_values=None, @@ -3437,7 +3234,7 @@ class TestIsListedInColumns(object): def test_arguments(self): """Test that _is_empty has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._is_listed_in_columns, expected_arguments=["self"], expected_default_values=None, @@ -3493,7 +3290,7 @@ class TestDfIsEmpty(object): def test_arguments(self): """Test that _df_is_empty has expected arguments.""" - h.test_function_arguments( + ta.functions.test_function_arguments( func=InputChecker._df_is_empty, expected_arguments=["self", "obj_name", "df"], expected_default_values=None,