From 3799ce24c9ed4693ae853146555808444d878878 Mon Sep 17 00:00:00 2001
From: pavle-martinovic_data <pavle.martinovic@databricks.com>
Date: Fri, 23 Jan 2026 17:21:45 +0100
Subject: [PATCH] Fix generating custom cutoffs for quarters

---
 .../forecast/prophet/diagnostics.py           |  8 +++---
 .../automl_runtime/forecast/utils.py          |  4 +--
 .../forecast/prophet/diagnostics_test.py      | 27 +++++++++++++++++++
 .../automl_runtime/forecast/utils_test.py     |  7 +++++
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/runtime/databricks/automl_runtime/forecast/prophet/diagnostics.py b/runtime/databricks/automl_runtime/forecast/prophet/diagnostics.py
index 0dd0651e..e7baf3aa 100644
--- a/runtime/databricks/automl_runtime/forecast/prophet/diagnostics.py
+++ b/runtime/databricks/automl_runtime/forecast/prophet/diagnostics.py
@@ -106,10 +106,10 @@ def map(self, func, *iterables):
     # add validation of the cutoff to make sure that the min cutoff is strictly greater than the min date in the history
     if min(cutoffs) <= df['ds'].min(): 
         raise ValueError("Minimum cutoff value is not strictly greater than min date in history")
-    # max value of cutoffs is <= (end date minus horizon)
-    end_date_minus_horizon = df['ds'].max() - horizon 
-    if max(cutoffs) > end_date_minus_horizon: 
-        raise ValueError("Maximum cutoff value is greater than end date minus horizon, no value for cross-validation remaining")
+    # max cutoff plus horizon must be <= max date in history
+    end_date = df['ds'].max()
+    if max(cutoffs) + horizon > end_date:
+        raise ValueError("Maximum cutoff plus horizon exceeds end date, no value for cross-validation remaining")
     initial = cutoffs[0] - df['ds'].min()
         
     # Check if the initial window 
diff --git a/runtime/databricks/automl_runtime/forecast/utils.py b/runtime/databricks/automl_runtime/forecast/utils.py
index 496f3862..e16fd4fc 100644
--- a/runtime/databricks/automl_runtime/forecast/utils.py
+++ b/runtime/databricks/automl_runtime/forecast/utils.py
@@ -219,8 +219,8 @@ def generate_custom_cutoffs(df: pd.DataFrame, horizon: int, frequency_unit: str,
     # First cutoff is the cutoff bewteen splits
     cutoff = split_cutoff
     result = []
-    max_cutoff = max(df["ds"]) - horizon_dateoffset
-    while cutoff <= max_cutoff:
+    max_cutoff = max(df["ds"])
+    while cutoff + horizon_dateoffset <= max_cutoff:
         # If data does not exist in data range (cutoff, cutoff + horizon_dateoffset]
         if (not (((df["ds"] > cutoff) & (df["ds"] <= cutoff + horizon_dateoffset)).any())):
             # Next cutoff point is "next date after cutoff in data - horizon_dateoffset"
diff --git a/runtime/tests/automl_runtime/forecast/prophet/diagnostics_test.py b/runtime/tests/automl_runtime/forecast/prophet/diagnostics_test.py
index 7b7b9245..b69b9de9 100644
--- a/runtime/tests/automl_runtime/forecast/prophet/diagnostics_test.py
+++ b/runtime/tests/automl_runtime/forecast/prophet/diagnostics_test.py
@@ -64,3 +64,30 @@ def test_cross_validation_success(self):
         df_cv = cross_validation(model, horizon=horizon, cutoffs=cutoffs)
         self.assertEqual(df_cv["ds"].tolist(), expected_ds.tolist())
         self.assertEqual(set(df_cv.columns), set(expected_cols))
+
+    def test_cross_validation_month_end_cutoff(self):
+        df = pd.DataFrame({
+            "ds": pd.to_datetime([
+                "2019-03-31",
+                "2019-06-30",
+                "2019-09-30",
+                "2019-12-31",
+                "2020-03-31",
+                "2020-06-30",
+                "2020-09-30",
+            ]),
+            "y": range(7),
+        })
+        model = Prophet(
+            yearly_seasonality=False,
+            weekly_seasonality=False,
+            daily_seasonality=False,
+        )
+        model.fit(df)
+
+        cutoffs = [pd.Timestamp("2020-03-31")]
+        horizon = pd.DateOffset(months=6)
+        df_cv = cross_validation(model, horizon=horizon, cutoffs=cutoffs)
+
+        expected_ds = df[(df["ds"] > cutoffs[0]) & (df["ds"] <= cutoffs[0] + horizon)]["ds"]
+        self.assertEqual(df_cv["ds"].tolist(), expected_ds.tolist())
diff --git a/runtime/tests/automl_runtime/forecast/utils_test.py b/runtime/tests/automl_runtime/forecast/utils_test.py
index c043da9a..d2ab3095 100644
--- a/runtime/tests/automl_runtime/forecast/utils_test.py
+++ b/runtime/tests/automl_runtime/forecast/utils_test.py
@@ -284,6 +284,13 @@ def test_generate_custom_cutoffs_success_quaterly(self):
         cutoffs = generate_custom_cutoffs(df, horizon=7, frequency_unit="QS", split_cutoff=pd.Timestamp('2020-07-12 00:00:00'))
         self.assertEqual([pd.Timestamp('2020-07-12 00:00:00'), pd.Timestamp('2020-10-12 00:00:00')], cutoffs)
 
+    def test_generate_custom_cutoffs_success_quaterly_end(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-03-31", periods=3, freq=pd.DateOffset(months=3)), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=2, frequency_unit="QS", split_cutoff=pd.Timestamp('2020-03-31 00:00:00'))
+        self.assertEqual([pd.Timestamp('2020-03-31 00:00:00')], cutoffs)
+
     def test_generate_custom_cutoffs_success_annualy(self):
         df = pd.DataFrame(
             pd.date_range(start="2012-07-14", periods=10, freq=pd.DateOffset(years=1)), columns=["ds"]