Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ def map(self, func, *iterables):
# add validation of the cutoff to make sure that the min cutoff is strictly greater than the min date in the history
if min(cutoffs) <= df['ds'].min():
raise ValueError("Minimum cutoff value is not strictly greater than min date in history")
# max value of cutoffs is <= (end date minus horizon)
end_date_minus_horizon = df['ds'].max() - horizon
if max(cutoffs) > end_date_minus_horizon:
raise ValueError("Maximum cutoff value is greater than end date minus horizon, no value for cross-validation remaining")
# max cutoff plus horizon must be <= max date in history
end_date = df['ds'].max()
if max(cutoffs) + horizon > end_date:
raise ValueError("Maximum cutoff plus horizon exceeds end date, no value for cross-validation remaining")
initial = cutoffs[0] - df['ds'].min()

# Check if the initial window
Expand Down
4 changes: 2 additions & 2 deletions runtime/databricks/automl_runtime/forecast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def generate_custom_cutoffs(df: pd.DataFrame, horizon: int, frequency_unit: str,
# First cutoff is the cutoff bewteen splits
Copy link

Copilot AI Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'bewteen' to 'between'.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will fix in followup PR

cutoff = split_cutoff
result = []
max_cutoff = max(df["ds"]) - horizon_dateoffset
while cutoff <= max_cutoff:
max_cutoff = max(df["ds"])
while cutoff + horizon_dateoffset <= max_cutoff:
# If data does not exist in data range (cutoff, cutoff + horizon_dateoffset]
if (not (((df["ds"] > cutoff) & (df["ds"] <= cutoff + horizon_dateoffset)).any())):
# Next cutoff point is "next date after cutoff in data - horizon_dateoffset"
Expand Down
27 changes: 27 additions & 0 deletions runtime/tests/automl_runtime/forecast/prophet/diagnostics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,30 @@ def test_cross_validation_success(self):
df_cv = cross_validation(model, horizon=horizon, cutoffs=cutoffs)
self.assertEqual(df_cv["ds"].tolist(), expected_ds.tolist())
self.assertEqual(set(df_cv.columns), set(expected_cols))

def test_cross_validation_month_end_cutoff(self):
df = pd.DataFrame({
"ds": pd.to_datetime([
"2019-03-31",
"2019-06-30",
"2019-09-30",
"2019-12-31",
"2020-03-31",
"2020-06-30",
"2020-09-30",
]),
"y": range(7),
})
model = Prophet(
yearly_seasonality=False,
weekly_seasonality=False,
daily_seasonality=False,
)
model.fit(df)

cutoffs = [pd.Timestamp("2020-03-31")]
horizon = pd.DateOffset(months=6)
df_cv = cross_validation(model, horizon=horizon, cutoffs=cutoffs)

expected_ds = df[(df["ds"] > cutoffs[0]) & (df["ds"] <= cutoffs[0] + horizon)]["ds"]
self.assertEqual(df_cv["ds"].tolist(), expected_ds.tolist())
7 changes: 7 additions & 0 deletions runtime/tests/automl_runtime/forecast/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,13 @@ def test_generate_custom_cutoffs_success_quaterly(self):
cutoffs = generate_custom_cutoffs(df, horizon=7, frequency_unit="QS", split_cutoff=pd.Timestamp('2020-07-12 00:00:00'))
self.assertEqual([pd.Timestamp('2020-07-12 00:00:00'), pd.Timestamp('2020-10-12 00:00:00')], cutoffs)

def test_generate_custom_cutoffs_success_quaterly_end(self):
Copy link

Copilot AI Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'quaterly' to 'quarterly'.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo is in other places too. Will fix in followup PR.

df = pd.DataFrame(
pd.date_range(start="2020-03-31", periods=3, freq=pd.DateOffset(months=3)), columns=["ds"]
).rename_axis("y").reset_index()
cutoffs = generate_custom_cutoffs(df, horizon=2, frequency_unit="QS", split_cutoff=pd.Timestamp('2020-03-31 00:00:00'))
self.assertEqual([pd.Timestamp('2020-03-31 00:00:00')], cutoffs)

def test_generate_custom_cutoffs_success_annualy(self):
df = pd.DataFrame(
pd.date_range(start="2012-07-14", periods=10, freq=pd.DateOffset(years=1)), columns=["ds"]
Expand Down
Loading