Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,37 @@
from typing import Tuple

import pandas as pd
import scipy
from scipy.stats import anderson_ksamp

from evidently.legacy.calculations.stattests.registry import StatTest
from evidently.legacy.calculations.stattests.registry import register_stattest
from evidently.legacy.core import ColumnType

# scipy>=1.17 deprecates the `midrank` keyword in favour of `variant=`. When
# `variant` is supplied the return object is no longer a 3-tuple and exposes
# `pvalue` instead. Use the new API on scipy>=1.17 to silence the
# DeprecationWarning reported in issue #1534, and fall back to the legacy
# tuple shape on older scipy. The minimum supported scipy is 1.10 per
# pyproject.toml, so the fallback path is required.
_SCIPY_VERSION: Tuple[int, ...] = tuple(int(p) for p in scipy.__version__.split(".")[:2] if p.isdigit())
_USE_VARIANT_KWARG = _SCIPY_VERSION >= (1, 17)


def _anderson_darling(
reference_data: pd.Series,
current_data: pd.Series,
feature_type: ColumnType,
threshold: float,
) -> Tuple[float, bool]:
p_value = anderson_ksamp([reference_data.values, current_data.values])[2]
samples = [reference_data.values, current_data.values]
if _USE_VARIANT_KWARG:
# New scipy API: returns a result object with a `.pvalue` attribute.
result = anderson_ksamp(samples, variant="midrank")
p_value = result.pvalue
else:
# Legacy 3-tuple: (statistic, critical_values, significance_level).
p_value = anderson_ksamp(samples)[2]
return p_value, p_value < threshold


Expand Down
17 changes: 17 additions & 0 deletions tests/stattests/test_stattests.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,23 @@ def test_anderson_darling() -> None:
assert anderson_darling_test.func(reference, current, "num", 0.001) == (approx(0.0635, abs=1e-3), False)


def test_anderson_darling_no_scipy_deprecation_warning() -> None:
"""Regression test for issue #1534: scipy>=1.17 deprecated the implicit
`midrank` default of anderson_ksamp. Calling the Evidently stat test
must not emit a DeprecationWarning about it."""
import warnings

reference = pd.Series([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
current = pd.Series([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always")
anderson_darling_test.func(reference, current, "num", 0.001)
midrank_warnings = [w for w in caught if "midrank" in str(w.message) or "variant" in str(w.message)]
assert not midrank_warnings, (
"anderson_ksamp emitted a midrank/variant DeprecationWarning: " f"{[str(w.message) for w in midrank_warnings]}"
)


def test_g_test() -> None:
reference = pd.Series(["a", "b", "c"]).repeat([5, 5, 8])
current = pd.Series(["a", "b", "c"]).repeat([4, 6, 8])
Expand Down