From 4d3402db0d7ac0aabf0b480d68428019d3081d1b Mon Sep 17 00:00:00 2001 From: jbbqqf Date: Sun, 10 May 2026 00:50:43 +0200 Subject: [PATCH] fix(stattests): silence scipy>=1.17 anderson_ksamp midrank deprecation (#1534) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scipy 1.17 deprecated the implicit "midrank" default of anderson_ksamp in favour of the `variant=` keyword. Calling anderson_ksamp(samples) without `variant=` emits a DeprecationWarning per call, which fails CI in projects that promote DeprecationWarnings to errors (the case reported in issue #1534). Pass `variant="midrank"` (matching the legacy default) on scipy>=1.17 and use the new result object's .pvalue attribute. Fall back to the old 3-tuple shape on scipy<1.17 — required because pyproject.toml floors scipy at 1.10. Add a regression test that runs anderson_darling_test under warnings.catch_warnings and asserts no midrank/variant warning is emitted. Co-Authored-By: Claude Opus 4.7 --- .../stattests/anderson_darling_stattest.py | 19 ++++++++++++++++++- tests/stattests/test_stattests.py | 17 +++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/evidently/legacy/calculations/stattests/anderson_darling_stattest.py b/src/evidently/legacy/calculations/stattests/anderson_darling_stattest.py index 5fb9740ee4..b72602a77c 100644 --- a/src/evidently/legacy/calculations/stattests/anderson_darling_stattest.py +++ b/src/evidently/legacy/calculations/stattests/anderson_darling_stattest.py @@ -26,12 +26,22 @@ from typing import Tuple import pandas as pd +import scipy from scipy.stats import anderson_ksamp from evidently.legacy.calculations.stattests.registry import StatTest from evidently.legacy.calculations.stattests.registry import register_stattest from evidently.legacy.core import ColumnType +# scipy>=1.17 deprecates the `midrank` keyword in favour of `variant=`. When +# `variant` is supplied the return object is no longer a 3-tuple and exposes +# `pvalue` instead. Use the new API on scipy>=1.17 to silence the +# DeprecationWarning reported in issue #1534, and fall back to the legacy +# tuple shape on older scipy. The minimum supported scipy is 1.10 per +# pyproject.toml, so the fallback path is required. +_SCIPY_VERSION: Tuple[int, ...] = tuple(int(p) for p in scipy.__version__.split(".")[:2] if p.isdigit()) +_USE_VARIANT_KWARG = _SCIPY_VERSION >= (1, 17) + def _anderson_darling( reference_data: pd.Series, @@ -39,7 +49,14 @@ def _anderson_darling( feature_type: ColumnType, threshold: float, ) -> Tuple[float, bool]: - p_value = anderson_ksamp([reference_data.values, current_data.values])[2] + samples = [reference_data.values, current_data.values] + if _USE_VARIANT_KWARG: + # New scipy API: returns a result object with a `.pvalue` attribute. + result = anderson_ksamp(samples, variant="midrank") + p_value = result.pvalue + else: + # Legacy 3-tuple: (statistic, critical_values, significance_level). + p_value = anderson_ksamp(samples)[2] return p_value, p_value < threshold diff --git a/tests/stattests/test_stattests.py b/tests/stattests/test_stattests.py index 33ed818466..3225938457 100644 --- a/tests/stattests/test_stattests.py +++ b/tests/stattests/test_stattests.py @@ -110,6 +110,23 @@ def test_anderson_darling() -> None: assert anderson_darling_test.func(reference, current, "num", 0.001) == (approx(0.0635, abs=1e-3), False) +def test_anderson_darling_no_scipy_deprecation_warning() -> None: + """Regression test for issue #1534: scipy>=1.17 deprecated the implicit + `midrank` default of anderson_ksamp. Calling the Evidently stat test + must not emit a DeprecationWarning about it.""" + import warnings + + reference = pd.Series([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]) + current = pd.Series([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + anderson_darling_test.func(reference, current, "num", 0.001) + midrank_warnings = [w for w in caught if "midrank" in str(w.message) or "variant" in str(w.message)] + assert not midrank_warnings, ( + "anderson_ksamp emitted a midrank/variant DeprecationWarning: " f"{[str(w.message) for w in midrank_warnings]}" + ) + + def test_g_test() -> None: reference = pd.Series(["a", "b", "c"]).repeat([5, 5, 8]) current = pd.Series(["a", "b", "c"]).repeat([4, 6, 8])