From 3eacaeadd9bd01038b4f3ec823ef2a4f80bb14dd Mon Sep 17 00:00:00 2001 From: yousoph Date: Thu, 18 Jun 2026 01:27:19 +0000 Subject: [PATCH 1/3] fix(pandas-postprocessing): add error handling and data validation for prophet forecast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add try/except around model.fit() and model.predict() in _prophet_fit_and_predict, re-raising prophet errors as InvalidPostProcessingError with a user-readable message - Add minimum 2 data points validation in prophet() before fitting, which is prophet's hard requirement; sparse daily data (e.g. Vehicle Sales dataset at Day granularity) could have only 1 row and trigger an unhandled 500 error - Update deprecated pandas frequency aliases: 'S'→'s', 'H'→'h', and add a runtime version check so 'M'/'Q'/'A' map to 'ME'/'QE'/'YE' on pandas>=2.2 (the new aliases are invalid on pinned pandas 2.1.4) - Add tests for insufficient data and prophet fitting errors Co-Authored-By: Claude Sonnet 4.6 --- .../utils/pandas_postprocessing/prophet.py | 18 ++++++++-- superset/utils/pandas_postprocessing/utils.py | 12 ++++--- .../pandas_postprocessing/test_prophet.py | 36 +++++++++++++++++++ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/superset/utils/pandas_postprocessing/prophet.py b/superset/utils/pandas_postprocessing/prophet.py index 85d5530937d8..4ee0385cc494 100644 --- a/superset/utils/pandas_postprocessing/prophet.py +++ b/superset/utils/pandas_postprocessing/prophet.py @@ -71,9 +71,17 @@ def _prophet_fit_and_predict( # pylint: disable=too-many-arguments ) if df["ds"].dt.tz: df["ds"] = df["ds"].dt.tz_convert(None) - model.fit(df) - future = model.make_future_dataframe(periods=periods, freq=freq) - forecast = model.predict(future)[["ds", "yhat", "yhat_lower", "yhat_upper"]] + try: + model.fit(df) + future = model.make_future_dataframe(periods=periods, freq=freq) + forecast = model.predict(future)[["ds", "yhat", "yhat_lower", "yhat_upper"]] + except Exception as ex: + raise InvalidPostProcessingError( + _( + "Unable to generate forecast: %(error)s", + error=str(ex), + ) + ) from ex return forecast.join(df.set_index("ds"), on="ds").set_index(["ds"]) @@ -136,6 +144,10 @@ def prophet( # pylint: disable=too-many-arguments raise InvalidPostProcessingError(_("DataFrame must include temporal column")) if len(df.columns) < 2: raise InvalidPostProcessingError(_("DataFrame include at least one series")) + if len(df) < 2: + raise InvalidPostProcessingError( + _("Forecast requires at least 2 data points") + ) target_df = DataFrame() diff --git a/superset/utils/pandas_postprocessing/utils.py b/superset/utils/pandas_postprocessing/utils.py index 4d6884c8af0e..f41f4a67f3cf 100644 --- a/superset/utils/pandas_postprocessing/utils.py +++ b/superset/utils/pandas_postprocessing/utils.py @@ -26,6 +26,8 @@ from superset.constants import TimeGrain from superset.exceptions import InvalidPostProcessingError +_PANDAS_VERSION = tuple(int(x) for x in pd.__version__.split(".")[:2]) + NUMPY_FUNCTIONS: dict[str, Callable[..., Any]] = { "average": np.average, "argmin": np.argmin, @@ -76,18 +78,18 @@ ) PROPHET_TIME_GRAIN_MAP: dict[str, str] = { - TimeGrain.SECOND: "S", + TimeGrain.SECOND: "s", TimeGrain.MINUTE: "min", TimeGrain.FIVE_MINUTES: "5min", TimeGrain.TEN_MINUTES: "10min", TimeGrain.FIFTEEN_MINUTES: "15min", TimeGrain.THIRTY_MINUTES: "30min", - TimeGrain.HOUR: "H", + TimeGrain.HOUR: "h", TimeGrain.DAY: "D", TimeGrain.WEEK: "W", - TimeGrain.MONTH: "M", - TimeGrain.QUARTER: "Q", - TimeGrain.YEAR: "A", + TimeGrain.MONTH: "ME" if _PANDAS_VERSION >= (2, 2) else "M", + TimeGrain.QUARTER: "QE" if _PANDAS_VERSION >= (2, 2) else "Q", + TimeGrain.YEAR: "YE" if _PANDAS_VERSION >= (2, 2) else "A", TimeGrain.WEEK_STARTING_SUNDAY: "W-SUN", TimeGrain.WEEK_STARTING_MONDAY: "W-MON", TimeGrain.WEEK_ENDING_SATURDAY: "W-SAT", diff --git a/tests/unit_tests/pandas_postprocessing/test_prophet.py b/tests/unit_tests/pandas_postprocessing/test_prophet.py index 7dacaeff9de1..a686999512fc 100644 --- a/tests/unit_tests/pandas_postprocessing/test_prophet.py +++ b/tests/unit_tests/pandas_postprocessing/test_prophet.py @@ -16,6 +16,7 @@ # under the License. from datetime import datetime from importlib.util import find_spec +from unittest.mock import patch import pandas as pd import pytest @@ -186,6 +187,41 @@ def test_prophet_incorrect_time_grain(): ) +def test_prophet_insufficient_data(): + single_row_df = pd.DataFrame( + { + DTTM_ALIAS: [datetime(2022, 1, 1)], + "sales": [100.0], + } + ) + with pytest.raises(InvalidPostProcessingError, match="at least 2 data points"): + prophet( + df=single_row_df, + time_grain="P1D", + periods=3, + confidence_interval=0.9, + ) + + +def test_prophet_fit_error(): + if find_spec("prophet") is None: + pytest.skip("prophet not installed") + + with patch( + "superset.utils.pandas_postprocessing.prophet._prophet_fit_and_predict" + ) as mock_fit: + mock_fit.side_effect = InvalidPostProcessingError( + "Unable to generate forecast: Dataframe has fewer than 2 non-NaN rows." + ) + with pytest.raises(InvalidPostProcessingError, match="Unable to generate forecast"): + prophet( + df=prophet_df, + time_grain="P1D", + periods=3, + confidence_interval=0.9, + ) + + def test_prophet_uncertainty_lower_bound_can_be_negative_for_negative_series(): """ Regression for #21734: when the input series contains negative values, From bef607cebb6caf87a15318639bc87f210acdb776 Mon Sep 17 00:00:00 2001 From: yousoph Date: Thu, 18 Jun 2026 08:49:19 +0000 Subject: [PATCH 2/3] fix(pandas-postprocessing): fix ruff pre-commit failures in prophet.py Add noqa: BLE001 to broad exception catch (required since Prophet can raise a mix of ValueError/RuntimeError), and collapse single-argument raise to one line per ruff-format. Co-Authored-By: Claude Sonnet 4.6 --- superset/utils/pandas_postprocessing/prophet.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/superset/utils/pandas_postprocessing/prophet.py b/superset/utils/pandas_postprocessing/prophet.py index 4ee0385cc494..ff83049f3ca7 100644 --- a/superset/utils/pandas_postprocessing/prophet.py +++ b/superset/utils/pandas_postprocessing/prophet.py @@ -75,7 +75,7 @@ def _prophet_fit_and_predict( # pylint: disable=too-many-arguments model.fit(df) future = model.make_future_dataframe(periods=periods, freq=freq) forecast = model.predict(future)[["ds", "yhat", "yhat_lower", "yhat_upper"]] - except Exception as ex: + except Exception as ex: # noqa: BLE001 raise InvalidPostProcessingError( _( "Unable to generate forecast: %(error)s", @@ -145,9 +145,7 @@ def prophet( # pylint: disable=too-many-arguments if len(df.columns) < 2: raise InvalidPostProcessingError(_("DataFrame include at least one series")) if len(df) < 2: - raise InvalidPostProcessingError( - _("Forecast requires at least 2 data points") - ) + raise InvalidPostProcessingError(_("Forecast requires at least 2 data points")) target_df = DataFrame() From 57f98b78bc94a8591308cee0872e695c1cfa09cc Mon Sep 17 00:00:00 2001 From: yousoph Date: Thu, 18 Jun 2026 09:09:50 +0000 Subject: [PATCH 3/3] fix(pandas-postprocessing): fix ruff C901 complexity and line-length in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add noqa: C901 to prophet() — the new data-point guard legitimately adds one branch, pushing cyclomatic complexity to 11; suppressing is correct here - Wrap long pytest.raises() call in test_prophet_fit_error to stay under 88 chars Co-Authored-By: Claude Sonnet 4.6 --- superset/utils/pandas_postprocessing/prophet.py | 2 +- tests/unit_tests/pandas_postprocessing/test_prophet.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/superset/utils/pandas_postprocessing/prophet.py b/superset/utils/pandas_postprocessing/prophet.py index ff83049f3ca7..0c71807da109 100644 --- a/superset/utils/pandas_postprocessing/prophet.py +++ b/superset/utils/pandas_postprocessing/prophet.py @@ -85,7 +85,7 @@ def _prophet_fit_and_predict( # pylint: disable=too-many-arguments return forecast.join(df.set_index("ds"), on="ds").set_index(["ds"]) -def prophet( # pylint: disable=too-many-arguments +def prophet( # pylint: disable=too-many-arguments # noqa: C901 df: DataFrame, time_grain: str, periods: int, diff --git a/tests/unit_tests/pandas_postprocessing/test_prophet.py b/tests/unit_tests/pandas_postprocessing/test_prophet.py index a686999512fc..c87c3790a95c 100644 --- a/tests/unit_tests/pandas_postprocessing/test_prophet.py +++ b/tests/unit_tests/pandas_postprocessing/test_prophet.py @@ -213,7 +213,9 @@ def test_prophet_fit_error(): mock_fit.side_effect = InvalidPostProcessingError( "Unable to generate forecast: Dataframe has fewer than 2 non-NaN rows." ) - with pytest.raises(InvalidPostProcessingError, match="Unable to generate forecast"): + with pytest.raises( + InvalidPostProcessingError, match="Unable to generate forecast" + ): prophet( df=prophet_df, time_grain="P1D",