From 1c03d2e296318106f7a4fd71a6658eec3dfcb2b9 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Sat, 20 Dec 2025 18:43:02 +0530
Subject: [PATCH 01/21] Fix utilsforecast evaluation compatibility
Handle missing cutoff/id_col in evaluation output
Make cross-validation robust to utilsforecast schema changes
---
timecopilot/utils/experiment_handler.py | 39 ++++++++++++++++++++-----
1 file changed, 31 insertions(+), 8 deletions(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 864ffe70..b51e73f4 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -11,6 +11,7 @@
from pydantic_ai.agent import AgentRunResult
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import _zero_to_nan, mae
+import numpy as np
from ..models.utils.forecaster import (
get_seasonality,
@@ -23,6 +24,10 @@
category=FutureWarning,
)
+def _zero_to_nan_pd(s: pd.Series) -> pd.Series:
+ s = s.astype(float).copy()
+ s[s == 0] = np.nan
+ return s
def mase(
df: pd.DataFrame,
@@ -31,18 +36,30 @@ def mase(
train_df: pd.DataFrame,
id_col: str = "unique_id",
target_col: str = "y",
+ **kwargs,
) -> pd.DataFrame:
- mean_abs_err = mae(df, models, id_col, target_col)
- mean_abs_err = mean_abs_err.set_index(id_col)
+ mean_abs_err = mae(df, models, id_col, target_col).set_index(id_col)
+
+ # don't divide datetime columns like 'cutoff'
+ cutoff = None
+ if "cutoff" in mean_abs_err.columns:
+ cutoff = mean_abs_err["cutoff"]
+ mean_abs_err = mean_abs_err.drop(columns=["cutoff"])
+
# assume train_df is sorted
lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality)
scale = train_df[target_col].sub(lagged).abs()
scale = scale.groupby(train_df[id_col], observed=True).mean()
scale[scale < 1e-2] = 0.0
- res = mean_abs_err.div(_zero_to_nan(scale), axis=0).fillna(0)
+
+ scale = _zero_to_nan_pd(scale).reindex(mean_abs_err.index) # align by id
+ res = mean_abs_err.div(scale, axis=0).fillna(0)
+
+ if cutoff is not None:
+ res.insert(0, "cutoff", cutoff)
+
res.index.name = id_col
- res = res.reset_index()
- return res
+ return res.reset_index()
def generate_train_cv_splits(
@@ -246,9 +263,15 @@ def add_id_cutoff(df: pd.DataFrame):
models=models,
id_col="id_cutoff",
)
- eval_df = eval_df.merge(cutoffs, on=["id_cutoff"])
- eval_df = eval_df.drop(columns=["id_cutoff"])
- eval_df = eval_df[["unique_id", "cutoff", "metric"] + models]
+ if "cutoff" not in eval_df.columns:
+ if "id_cutoff" in eval_df.columns:
+ eval_df = eval_df.merge(cutoffs, on="id_cutoff", how="left")
+ else:
+ pass
+
+ cols = ["unique_id", "cutoff", "metric"] + models
+ cols = [c for c in cols if c in eval_df.columns]
+ eval_df = eval_df[cols]
return eval_df
From f1bb87e0438be57fee8a6118f83f1658c5dd57fc Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Wed, 7 Jan 2026 14:09:15 +0530
Subject: [PATCH 02/21] Add cutoff_col support to mase and forward to mae
Add cutoff_col to mase and forward it to mae for correct evaluation
---
timecopilot/utils/experiment_handler.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index b51e73f4..1e1309ed 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -4,6 +4,7 @@
from functools import partial
from pathlib import Path
from typing import Any
+import numpy as np
import pandas as pd
from pydantic import BaseModel, Field
@@ -11,7 +12,6 @@
from pydantic_ai.agent import AgentRunResult
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import _zero_to_nan, mae
-import numpy as np
from ..models.utils.forecaster import (
get_seasonality,
@@ -35,28 +35,33 @@ def mase(
seasonality: int,
train_df: pd.DataFrame,
id_col: str = "unique_id",
+ time_col: str = "ds",
target_col: str = "y",
- **kwargs,
+ cutoff_col: str = "cutoff",
) -> pd.DataFrame:
- mean_abs_err = mae(df, models, id_col, target_col).set_index(id_col)
+ mean_abs_err = mae(
+ df,
+ models,
+ id_col=id_col,
+ target_col=target_col,
+ cutoff_col=cutoff_col,
+ ).set_index(id_col)
- # don't divide datetime columns like 'cutoff'
cutoff = None
- if "cutoff" in mean_abs_err.columns:
- cutoff = mean_abs_err["cutoff"]
- mean_abs_err = mean_abs_err.drop(columns=["cutoff"])
+ if cutoff_col in mean_abs_err.columns:
+ cutoff = mean_abs_err[cutoff_col]
+ mean_abs_err = mean_abs_err.drop(columns=[cutoff_col])
- # assume train_df is sorted
lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality)
scale = train_df[target_col].sub(lagged).abs()
scale = scale.groupby(train_df[id_col], observed=True).mean()
scale[scale < 1e-2] = 0.0
- scale = _zero_to_nan_pd(scale).reindex(mean_abs_err.index) # align by id
+ scale = _zero_to_nan_pd(scale).reindex(mean_abs_err.index)
res = mean_abs_err.div(scale, axis=0).fillna(0)
if cutoff is not None:
- res.insert(0, "cutoff", cutoff)
+ res.insert(0, cutoff_col, cutoff)
res.index.name = id_col
return res.reset_index()
From d0c4c3def59987152c6b8e3d73eab31ebab673f4 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Thu, 8 Jan 2026 20:54:47 +0530
Subject: [PATCH 03/21] removed cutoff_col from mae
---
timecopilot/utils/experiment_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 1e1309ed..847a1bed 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -44,7 +44,7 @@ def mase(
models,
id_col=id_col,
target_col=target_col,
- cutoff_col=cutoff_col,
+ # cutoff_col=cutoff_col,
).set_index(id_col)
cutoff = None
From 2c7932f27568cbf81fc0879718a35c11176495cb Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Fri, 9 Jan 2026 10:53:33 +0530
Subject: [PATCH 04/21] Fix lint and styling issues
---
docs/examples/agent-quickstart.ipynb | 5 +-
...maly-detection-forecaster-quickstart.ipynb | 8 +-
docs/examples/aws-bedrock.ipynb | 14 +-
docs/examples/chronos-family.ipynb | 12 +-
docs/examples/cryptocurrency-quickstart.ipynb | 106 +-
docs/examples/forecaster-quickstart.ipynb | 16 +-
docs/examples/gift-eval.ipynb | 3516 +++++++++--------
docs/examples/llm-providers.ipynb | 19 +-
...ndation-models-comparison-quickstart.ipynb | 27 +-
timecopilot/utils/experiment_handler.py | 6 +-
10 files changed, 1869 insertions(+), 1860 deletions(-)
diff --git a/docs/examples/agent-quickstart.ipynb b/docs/examples/agent-quickstart.ipynb
index 92114456..92f97fbc 100644
--- a/docs/examples/agent-quickstart.ipynb
+++ b/docs/examples/agent-quickstart.ipynb
@@ -16,6 +16,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()"
]
},
@@ -35,6 +36,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
+ "\n",
"from timecopilot import TimeCopilot"
]
},
@@ -157,11 +159,10 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tc = TimeCopilot(\n",
" llm=\"openai:gpt-4o\",\n",
" retries=3,\n",
- ")\n"
+ ")"
]
},
{
diff --git a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
index 18f6134a..a5443bdd 100644
--- a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
+++ b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
@@ -35,6 +35,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
+ "\n",
"from timecopilot import TimeCopilotForecaster"
]
},
@@ -183,7 +184,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/the_anomaly_tour.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df\n"
+ "df"
]
},
{
@@ -201,9 +202,9 @@
"metadata": {},
"outputs": [],
"source": [
- "from timecopilot.models.stats import SeasonalNaive, Theta\n",
"from timecopilot.models.foundation.chronos import Chronos\n",
- "from timecopilot.models.foundation.flowstate import FlowState"
+ "from timecopilot.models.foundation.flowstate import FlowState\n",
+ "from timecopilot.models.stats import SeasonalNaive, Theta"
]
},
{
@@ -222,7 +223,6 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb
index e031bab7..fcceedc8 100644
--- a/docs/examples/aws-bedrock.ipynb
+++ b/docs/examples/aws-bedrock.ipynb
@@ -37,11 +37,12 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()\n",
"\n",
- "from timecopilot import TimeCopilot\n",
+ "import pandas as pd\n",
"\n",
- "import pandas as pd"
+ "from timecopilot import TimeCopilot"
]
},
{
@@ -164,7 +165,7 @@
"outputs": [],
"source": [
"tc = TimeCopilot(\n",
- " llm='bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0',\n",
+ " llm=\"bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0\",\n",
")"
]
},
@@ -187,10 +188,7 @@
"source": [
"from pydantic_ai.models.bedrock import BedrockConverseModel\n",
"\n",
- "model = BedrockConverseModel(\n",
- " 'us.anthropic.claude-3-5-sonnet-20241022-v2:0'\n",
- " \n",
- ")\n",
+ "model = BedrockConverseModel(\"us.anthropic.claude-3-5-sonnet-20241022-v2:0\")\n",
"tc = TimeCopilot(\n",
" llm=model,\n",
")"
@@ -220,7 +218,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")\n"
+ "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")"
]
},
{
diff --git a/docs/examples/chronos-family.ipynb b/docs/examples/chronos-family.ipynb
index f9bb2021..84174108 100644
--- a/docs/examples/chronos-family.ipynb
+++ b/docs/examples/chronos-family.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()\n"
+ "df.head()"
]
},
{
@@ -241,7 +241,7 @@
"outputs": [],
"source": [
"level = [20, 40, 60, 80]\n",
- "cv_df = tcf.cross_validation(df=df, h=12, level=level) "
+ "cv_df = tcf.cross_validation(df=df, h=12, level=level)"
]
},
{
@@ -638,12 +638,14 @@
],
"source": [
"eval_df = evaluate(\n",
- " cv_df.drop(columns=[\"cutoff\"]), \n",
- " train_df=df.query(\"ds <= '2024-08-31'\"), \n",
+ " cv_df.drop(columns=[\"cutoff\"]),\n",
+ " train_df=df.query(\"ds <= '2024-08-31'\"),\n",
" metrics=[partial(mase, seasonality=12), scaled_crps],\n",
" level=level,\n",
")\n",
- "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)"
+ "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n",
+ " 3\n",
+ ")"
]
}
],
diff --git a/docs/examples/cryptocurrency-quickstart.ipynb b/docs/examples/cryptocurrency-quickstart.ipynb
index 4d4a39a0..e22475b2 100644
--- a/docs/examples/cryptocurrency-quickstart.ipynb
+++ b/docs/examples/cryptocurrency-quickstart.ipynb
@@ -16,23 +16,20 @@
"metadata": {},
"outputs": [],
"source": [
- "import sys\n",
- "\n",
"# Import all the timecopilot goodies\n",
- "import timecopilot\n",
- "from timecopilot import TimeCopilotForecaster\n",
- "from timecopilot.models.stats import SeasonalNaive, Theta\n",
- "from timecopilot.models.foundation.chronos import Chronos\n",
+ "import os\n",
"\n",
- "from timecopilot.models.prophet import Prophet\n",
- "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive\n",
- "from timecopilot.models.foundation.moirai import Moirai\n",
+ "import kagglehub\n",
+ "import numpy as np\n",
"\n",
"# Import basic libraries\n",
"import pandas as pd\n",
- "import kagglehub\n",
- "import os\n",
- "import numpy as np"
+ "\n",
+ "from timecopilot import TimeCopilotForecaster\n",
+ "from timecopilot.models.foundation.chronos import Chronos\n",
+ "from timecopilot.models.foundation.moirai import Moirai\n",
+ "from timecopilot.models.prophet import Prophet\n",
+ "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive, Theta"
]
},
{
@@ -63,7 +60,7 @@
"outputs": [],
"source": [
"files = os.listdir(path)\n",
- "files = [path+'/'+x for x in files]"
+ "files = [path + \"/\" + x for x in files]"
]
},
{
@@ -198,18 +195,18 @@
"# Read all filez and set them up to the readable structure for timecopilot\n",
"for file in files:\n",
" temp_df = pd.read_csv(file)\n",
- " temp_df = temp_df[['Symbol','Date','Close']]\n",
- " temp_df.columns = ['unique_id','ds','y']\n",
- " big_df = pd.concat([big_df,temp_df])\n",
+ " temp_df = temp_df[[\"Symbol\", \"Date\", \"Close\"]]\n",
+ " temp_df.columns = [\"unique_id\", \"ds\", \"y\"]\n",
+ " big_df = pd.concat([big_df, temp_df])\n",
"\n",
"big_df = big_df.reset_index(drop=True)\n",
"big_df[\"ds\"] = pd.to_datetime(big_df[\"ds\"], dayfirst=True, errors=\"coerce\")\n",
"\n",
- "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further. \n",
+ "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further.\n",
"# big_df = big_df[big_df.ds >= \"2021-01-01\"]\n",
- "cryptos=['MIOTA','XEM','ETH','LTC','DOGE','CRO','USDC','ADA']\n",
- "big_df=big_df[big_df.unique_id.isin(cryptos)]\n",
- "big_df=big_df.reset_index(drop=True)\n",
+ "cryptos = [\"MIOTA\", \"XEM\", \"ETH\", \"LTC\", \"DOGE\", \"CRO\", \"USDC\", \"ADA\"]\n",
+ "big_df = big_df[big_df.unique_id.isin(cryptos)]\n",
+ "big_df = big_df.reset_index(drop=True)\n",
"big_df"
]
},
@@ -341,6 +338,7 @@
" df_out.loc[idx, col] = np.nan\n",
" return df_out\n",
"\n",
+ "\n",
"df_missing = add_missing(big_df, col=\"y\", frac=0.03, seed=42)\n",
"df_missing = df_missing.sample(frac=1, random_state=42).reset_index(drop=True)\n",
"print(df_missing)"
@@ -709,12 +707,14 @@
}
],
"source": [
- "anomaly_summary_xlm=anomalies_df[\n",
+ "anomaly_summary_xlm = anomalies_df[\n",
" # (anomalies_df.unique_id=='SOL') & \\\n",
- " ((anomalies_df['Chronos-anomaly']==True) | \\\n",
- " (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
- " (anomalies_df['Theta-anomaly']==True)\n",
- " )].reset_index(drop=True)\n",
+ " (\n",
+ " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
+ " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
+ " | (anomalies_df[\"Theta-anomaly\"] == True)\n",
+ " )\n",
+ "].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -954,12 +954,14 @@
}
],
"source": [
- "anomaly_summary_xlm=anomalies_df[\n",
- " (anomalies_df.unique_id=='ADA') & \\\n",
- " ((anomalies_df['Chronos-anomaly']==True) | \\\n",
- " (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
- " (anomalies_df['Theta-anomaly']==True)\n",
- " )].reset_index(drop=True)\n",
+ "anomaly_summary_xlm = anomalies_df[\n",
+ " (anomalies_df.unique_id == \"ADA\")\n",
+ " & (\n",
+ " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
+ " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
+ " | (anomalies_df[\"Theta-anomaly\"] == True)\n",
+ " )\n",
+ "].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -1199,12 +1201,14 @@
}
],
"source": [
- "anomaly_summary_xlm=anomalies_df[\n",
- " (anomalies_df.unique_id=='ADA') & \\\n",
- " ((anomalies_df['Chronos-anomaly']==True) & \\\n",
- " (anomalies_df['SeasonalNaive-anomaly']==True) \\\n",
- " # (anomalies_df['Theta-anomaly']==True)\n",
- " )].reset_index(drop=True)\n",
+ "anomaly_summary_xlm = anomalies_df[\n",
+ " (anomalies_df.unique_id == \"ADA\")\n",
+ " & (\n",
+ " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
+ " & (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
+ " # (anomalies_df['Theta-anomaly']==True)\n",
+ " )\n",
+ "].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -1241,12 +1245,12 @@
"source": [
"tcf1 = TimeCopilotForecaster(\n",
" models=[\n",
- " AutoARIMA(), \n",
+ " AutoARIMA(),\n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
" Theta(),\n",
- " AutoETS(), \n",
- " Moirai(), \n",
- " Prophet(), \n",
+ " AutoETS(),\n",
+ " Moirai(),\n",
+ " Prophet(),\n",
" SeasonalNaive(),\n",
" ]\n",
")"
@@ -1259,7 +1263,7 @@
"metadata": {},
"outputs": [],
"source": [
- "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80,90])"
+ "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80, 90])"
]
},
{
@@ -1303,9 +1307,9 @@
"metadata": {},
"outputs": [],
"source": [
- "eth_fcst_normal=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
- " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
- " .reset_index(drop=True)"
+ "eth_fcst_normal = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
+ " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
+ "].reset_index(drop=True)"
]
},
{
@@ -1345,9 +1349,9 @@
"metadata": {},
"outputs": [],
"source": [
- "eth_fcst_missing=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
- " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
- " .reset_index(drop=True)"
+ "eth_fcst_missing = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
+ " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
+ "].reset_index(drop=True)"
]
},
{
@@ -1515,9 +1519,9 @@
}
],
"source": [
- "compare=eth_fcst_normal.merge(eth_fcst_missing,on=['ds','unique_id'])\n",
- "compare['dif']=abs(compare['Chronos_x']-compare['Chronos_y'])\n",
- "print(compare['dif'].sum())"
+ "compare = eth_fcst_normal.merge(eth_fcst_missing, on=[\"ds\", \"unique_id\"])\n",
+ "compare[\"dif\"] = abs(compare[\"Chronos_x\"] - compare[\"Chronos_y\"])\n",
+ "print(compare[\"dif\"].sum())"
]
},
{
diff --git a/docs/examples/forecaster-quickstart.ipynb b/docs/examples/forecaster-quickstart.ipynb
index 6239177a..31ed2858 100644
--- a/docs/examples/forecaster-quickstart.ipynb
+++ b/docs/examples/forecaster-quickstart.ipynb
@@ -24,6 +24,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
+ "\n",
"from timecopilot import TimeCopilotForecaster"
]
},
@@ -131,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()\n"
+ "df.head()"
]
},
{
@@ -149,9 +150,9 @@
"metadata": {},
"outputs": [],
"source": [
+ "from timecopilot.models.foundation.moirai import Moirai\n",
"from timecopilot.models.prophet import Prophet\n",
- "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive\n",
- "from timecopilot.models.foundation.moirai import Moirai"
+ "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive"
]
},
{
@@ -170,13 +171,12 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
- " AutoARIMA(), \n",
- " AutoETS(), \n",
- " Moirai(), \n",
- " Prophet(), \n",
+ " AutoARIMA(),\n",
+ " AutoETS(),\n",
+ " Moirai(),\n",
+ " Prophet(),\n",
" SeasonalNaive(),\n",
" ]\n",
")"
diff --git a/docs/examples/gift-eval.ipynb b/docs/examples/gift-eval.ipynb
index 70347b4d..6273cb26 100644
--- a/docs/examples/gift-eval.ipynb
+++ b/docs/examples/gift-eval.ipynb
@@ -1,1782 +1,1784 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "azZ6BczQLj_B"
- },
- "source": [
- "# Foundation Model Ensemble (GIFT-Eval)\n",
- "\n",
- "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n",
- "\n",
- "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n",
- "\n",
- "\n",
- "\n",
- "## Model Description\n",
- "\n",
- "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n",
- "\n",
- "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n",
- "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n",
- "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n",
- "\n",
- "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n",
- "\n",
- "## TimeCopilot's Key Features\n",
- "\n",
- "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n",
- "- **Ensemble capabilities**: Built-in ensemble methods\n",
- "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n",
- "- **Dependency management**: Handles complex model requirements automatically\n",
- "- **GPU efficiency**: Optimized memory sharing and multi‑model execution"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M2SumVjnLj_C"
- },
- "source": [
- "## Requirements and Installation\n",
- "\n",
- "Install TimeCopilot library:\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "yQpa1NOvLj_D"
- },
- "outputs": [],
- "source": [
- "%pip install \"timecopilot>=0.0.22\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "tVsga7ogLj_D"
- },
- "source": [
- "## Dataset Setup\n",
- "\n",
- "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "mriqHxfOLj_D"
- },
- "outputs": [],
- "source": [
- "from timecopilot.gift_eval.eval import GIFTEval\n",
- "\n",
- "# TimeCopilot's built-in GIFT-Eval dataset downloader\n",
- "# Handles the complete benchmark dataset with all 97 configurations\n",
- "storage_path = \"./data/gift-eval\"\n",
- "GIFTEval.download_data(storage_path=storage_path)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "-SHX7gAtLj_D"
- },
- "source": [
- "## Model Implementation\n",
- "\n",
- "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "iWYKncn03jVy"
- },
- "outputs": [],
- "source": [
- "from timecopilot.models.ensembles.median import MedianEnsemble\n",
- "from timecopilot.models.foundation.chronos import Chronos\n",
- "from timecopilot.models.foundation.timesfm import TimesFM\n",
- "from timecopilot.models.foundation.tirex import TiRex\n",
- "from timecopilot.models.utils.forecaster import Forecaster\n",
- "\n",
- "batch_size = 64\n",
- "\n",
- "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n",
- "# Automatically handles dependency conflicts and GPU memory management\n",
- "ensemble = MedianEnsemble(\n",
- " models=[\n",
- " # Each model uses TimeCopilot's unified interface despite different architectures\n",
- " Chronos(\n",
- " repo_id=\"amazon/chronos-2\",\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " TimesFM(\n",
- " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " TiRex(\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " ],\n",
- " alias=\"TimeCopilot\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sCjZScu5Lj_E"
- },
- "source": [
- "## Evaluation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "yPKpn4e04KZD"
- },
- "source": [
- "### Defining the evaluator"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M2YcjoDF5NH7"
- },
- "source": [
- "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "RMvE9Cx9Lj_D"
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from timecopilot.gift_eval.eval import GIFTEval\n",
- "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n",
- "\n",
- "\n",
- "def evaluate_forecaster(\n",
- " forecaster: Forecaster,\n",
- " dataset_name: str,\n",
- " term: str,\n",
- " output_path: str,\n",
- " storage_path: str,\n",
- " ):\n",
- " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
- "\n",
- " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
- " gifteval = GIFTEval(\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=output_path,\n",
- " storage_path=storage_path,\n",
- " )\n",
- "\n",
- " # GluonTS wrapper for GIFT-Eval compatibility\n",
- " # It can receive any Forecaster from TimeCopilot\n",
- " predictor = GluonTSPredictor(\n",
- " forecaster=forecaster,\n",
- " max_length=4_096,\n",
- " batch_size=1_024,\n",
- " )\n",
- "\n",
- " # Run evaluation with GIFT-Eval's standardized metrics\n",
- " gifteval.evaluate_predictor(predictor, batch_size=512)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "ajc2VPQl5cPY"
- },
- "source": [
- "### Performing evaluation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "74XuerNA5rWU"
- },
- "source": [
- "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "R41M3rDeLj_E"
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "\n",
- "\n",
- "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
- " combinations = [\n",
- " (\"m4_weekly\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"medium\"),\n",
- " (\"bizitobs_l2c/H\", \"long\"),\n",
- " ]\n",
- "\n",
- " for dataset_name, term in combinations:\n",
- " evaluate_forecaster(\n",
- " forecaster=ensemble,\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=f\"./results/timecopilot\",\n",
- " storage_path=storage_path,\n",
- " )\n",
- "\n",
- " # Load consolidated results in GIFT-Eval format\n",
- " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
- },
- "id": "cQ7WOUKCR_4h",
- "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661"
- },
- "outputs": [
- {
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "eval_df"
- },
- "text/html": [
- "\n",
- "
\n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " TimeCopilot | \n",
- " 220437.833920 | \n",
- " 220437.833920 | \n",
- " 239.903438 | \n",
- " 1.916661 | \n",
- " 0.058617 | \n",
- " 0.058292 | \n",
- " 14.666592 | \n",
- " 469.508077 | \n",
- " 0.085537 | \n",
- " 0.043707 | \n",
- " 0.034997 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " TimeCopilot | \n",
- " 54.628522 | \n",
- " 54.628522 | \n",
- " 4.459038 | \n",
- " 0.444425 | \n",
- " 0.385657 | \n",
- " 0.580057 | \n",
- " 2.696251 | \n",
- " 7.391111 | \n",
- " 0.398400 | \n",
- " 0.240354 | \n",
- " 0.186401 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " TimeCopilot | \n",
- " 71.800877 | \n",
- " 71.800877 | \n",
- " 4.851640 | \n",
- " 0.488632 | \n",
- " 0.470714 | \n",
- " 0.757992 | \n",
- " 3.374162 | \n",
- " 8.473540 | \n",
- " 0.513086 | \n",
- " 0.293774 | \n",
- " 0.232035 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " TimeCopilot | \n",
- " 83.786483 | \n",
- " 83.786483 | \n",
- " 5.340595 | \n",
- " 0.566997 | \n",
- " 0.619369 | \n",
- " 0.782812 | \n",
- " 4.585122 | \n",
- " 9.153496 | \n",
- " 0.559122 | \n",
- " 0.326219 | \n",
- " 0.261100 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short TimeCopilot 220437.833920 \n",
- "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n",
- "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n",
- "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 220437.833920 239.903438 1.916661 \n",
- "1 54.628522 4.459038 0.444425 \n",
- "2 71.800877 4.851640 0.488632 \n",
- "3 83.786483 5.340595 0.566997 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.058617 0.058292 14.666592 \n",
- "1 0.385657 0.580057 2.696251 \n",
- "2 0.470714 0.757992 3.374162 \n",
- "3 0.619369 0.782812 4.585122 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 469.508077 0.085537 0.043707 \n",
- "1 7.391111 0.398400 0.240354 \n",
- "2 8.473540 0.513086 0.293774 \n",
- "3 9.153496 0.559122 0.326219 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.034997 Econ/Fin 1 \n",
- "1 0.186401 Web/CloudOps 7 \n",
- "2 0.232035 Web/CloudOps 7 \n",
- "3 0.261100 Web/CloudOps 7 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "if torch.cuda.is_available():\n",
- " from IPython.display import display\n",
- "\n",
- " display(eval_df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7JCiHenv6Dma"
- },
- "source": [
- "You can access the complete combination of datasets with the following:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "RmmMtHpA6HIu"
- },
- "outputs": [],
- "source": [
- "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "2WBJ-wjv6Kz6",
- "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "DATASETS_WITH_TERMS[:3]"
- ]
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "azZ6BczQLj_B"
+ },
+ "source": [
+ "# Foundation Model Ensemble (GIFT-Eval)\n",
+ "\n",
+ "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n",
+ "\n",
+ "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Model Description\n",
+ "\n",
+ "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n",
+ "\n",
+ "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n",
+ "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n",
+ "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n",
+ "\n",
+ "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n",
+ "\n",
+ "## TimeCopilot's Key Features\n",
+ "\n",
+ "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n",
+ "- **Ensemble capabilities**: Built-in ensemble methods\n",
+ "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n",
+ "- **Dependency management**: Handles complex model requirements automatically\n",
+ "- **GPU efficiency**: Optimized memory sharing and multi‑model execution"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M2SumVjnLj_C"
+ },
+ "source": [
+ "## Requirements and Installation\n",
+ "\n",
+ "Install TimeCopilot library:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "yQpa1NOvLj_D"
+ },
+ "outputs": [],
+ "source": [
+ "%pip install \"timecopilot>=0.0.22\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tVsga7ogLj_D"
+ },
+ "source": [
+ "## Dataset Setup\n",
+ "\n",
+ "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "mriqHxfOLj_D"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.gift_eval.eval import GIFTEval\n",
+ "\n",
+ "# TimeCopilot's built-in GIFT-Eval dataset downloader\n",
+ "# Handles the complete benchmark dataset with all 97 configurations\n",
+ "storage_path = \"./data/gift-eval\"\n",
+ "GIFTEval.download_data(storage_path=storage_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-SHX7gAtLj_D"
+ },
+ "source": [
+ "## Model Implementation\n",
+ "\n",
+ "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "iWYKncn03jVy"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.models.ensembles.median import MedianEnsemble\n",
+ "from timecopilot.models.foundation.chronos import Chronos\n",
+ "from timecopilot.models.foundation.timesfm import TimesFM\n",
+ "from timecopilot.models.foundation.tirex import TiRex\n",
+ "from timecopilot.models.utils.forecaster import Forecaster\n",
+ "\n",
+ "batch_size = 64\n",
+ "\n",
+ "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n",
+ "# Automatically handles dependency conflicts and GPU memory management\n",
+ "ensemble = MedianEnsemble(\n",
+ " models=[\n",
+ " # Each model uses TimeCopilot's unified interface despite different architectures\n",
+ " Chronos(\n",
+ " repo_id=\"amazon/chronos-2\",\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " TimesFM(\n",
+ " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " TiRex(\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " ],\n",
+ " alias=\"TimeCopilot\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sCjZScu5Lj_E"
+ },
+ "source": [
+ "## Evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yPKpn4e04KZD"
+ },
+ "source": [
+ "### Defining the evaluator"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M2YcjoDF5NH7"
+ },
+ "source": [
+ "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "RMvE9Cx9Lj_D"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "from timecopilot.gift_eval.eval import GIFTEval\n",
+ "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n",
+ "\n",
+ "\n",
+ "def evaluate_forecaster(\n",
+ " forecaster: Forecaster,\n",
+ " dataset_name: str,\n",
+ " term: str,\n",
+ " output_path: str,\n",
+ " storage_path: str,\n",
+ "):\n",
+ " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
+ "\n",
+ " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
+ " gifteval = GIFTEval(\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=output_path,\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "\n",
+ " # GluonTS wrapper for GIFT-Eval compatibility\n",
+ " # It can receive any Forecaster from TimeCopilot\n",
+ " predictor = GluonTSPredictor(\n",
+ " forecaster=forecaster,\n",
+ " max_length=4_096,\n",
+ " batch_size=1_024,\n",
+ " )\n",
+ "\n",
+ " # Run evaluation with GIFT-Eval's standardized metrics\n",
+ " gifteval.evaluate_predictor(predictor, batch_size=512)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ajc2VPQl5cPY"
+ },
+ "source": [
+ "### Performing evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74XuerNA5rWU"
+ },
+ "source": [
+ "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "R41M3rDeLj_E"
+ },
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "\n",
+ "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
+ " combinations = [\n",
+ " (\"m4_weekly\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"medium\"),\n",
+ " (\"bizitobs_l2c/H\", \"long\"),\n",
+ " ]\n",
+ "\n",
+ " for dataset_name, term in combinations:\n",
+ " evaluate_forecaster(\n",
+ " forecaster=ensemble,\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=\"./results/timecopilot\",\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "\n",
+ " # Load consolidated results in GIFT-Eval format\n",
+ " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
},
+ "id": "cQ7WOUKCR_4h",
+ "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "7I9OQThW6OD8",
- "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb"
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "eval_df"
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "97"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " TimeCopilot | \n",
+ " 220437.833920 | \n",
+ " 220437.833920 | \n",
+ " 239.903438 | \n",
+ " 1.916661 | \n",
+ " 0.058617 | \n",
+ " 0.058292 | \n",
+ " 14.666592 | \n",
+ " 469.508077 | \n",
+ " 0.085537 | \n",
+ " 0.043707 | \n",
+ " 0.034997 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " TimeCopilot | \n",
+ " 54.628522 | \n",
+ " 54.628522 | \n",
+ " 4.459038 | \n",
+ " 0.444425 | \n",
+ " 0.385657 | \n",
+ " 0.580057 | \n",
+ " 2.696251 | \n",
+ " 7.391111 | \n",
+ " 0.398400 | \n",
+ " 0.240354 | \n",
+ " 0.186401 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " TimeCopilot | \n",
+ " 71.800877 | \n",
+ " 71.800877 | \n",
+ " 4.851640 | \n",
+ " 0.488632 | \n",
+ " 0.470714 | \n",
+ " 0.757992 | \n",
+ " 3.374162 | \n",
+ " 8.473540 | \n",
+ " 0.513086 | \n",
+ " 0.293774 | \n",
+ " 0.232035 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " TimeCopilot | \n",
+ " 83.786483 | \n",
+ " 83.786483 | \n",
+ " 5.340595 | \n",
+ " 0.566997 | \n",
+ " 0.619369 | \n",
+ " 0.782812 | \n",
+ " 4.585122 | \n",
+ " 9.153496 | \n",
+ " 0.559122 | \n",
+ " 0.326219 | \n",
+ " 0.261100 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "source": [
- "len(DATASETS_WITH_TERMS)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "BXupvNSFVWhG"
- },
- "source": [
- "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "xK8KTPic6UzR"
- },
- "source": [
- "## Reproducibility statement"
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short TimeCopilot 220437.833920 \n",
+ "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n",
+ "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n",
+ "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 220437.833920 239.903438 1.916661 \n",
+ "1 54.628522 4.459038 0.444425 \n",
+ "2 71.800877 4.851640 0.488632 \n",
+ "3 83.786483 5.340595 0.566997 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.058617 0.058292 14.666592 \n",
+ "1 0.385657 0.580057 2.696251 \n",
+ "2 0.470714 0.757992 3.374162 \n",
+ "3 0.619369 0.782812 4.585122 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 469.508077 0.085537 0.043707 \n",
+ "1 7.391111 0.398400 0.240354 \n",
+ "2 8.473540 0.513086 0.293774 \n",
+ "3 9.153496 0.559122 0.326219 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.034997 Econ/Fin 1 \n",
+ "1 0.186401 Web/CloudOps 7 \n",
+ "2 0.232035 Web/CloudOps 7 \n",
+ "3 0.261100 Web/CloudOps 7 "
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " from IPython.display import display\n",
+ "\n",
+ " display(eval_df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7JCiHenv6Dma"
+ },
+ "source": [
+ "You can access the complete combination of datasets with the following:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "RmmMtHpA6HIu"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "id": "2WBJ-wjv6Kz6",
+ "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "g0-oVisu6XX3"
- },
- "source": [
- "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)."
+ "data": {
+ "text/plain": [
+ "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]"
]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "DATASETS_WITH_TERMS[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "id": "7I9OQThW6OD8",
+ "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "0kJwJ8mX6TH2"
- },
- "outputs": [],
- "source": [
- "from timecopilot.models.stats import SeasonalNaive\n",
- "\n",
- "combinations = [\n",
- " (\"m4_weekly\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"medium\"),\n",
- " (\"bizitobs_l2c/H\", \"long\"),\n",
- "]\n",
- "\n",
- "for dataset_name, term in combinations:\n",
- " evaluate_forecaster(\n",
- " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=f\"./results/seasonal_naive\",\n",
- " storage_path=storage_path,\n",
- " )\n",
- "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")"
+ "data": {
+ "text/plain": [
+ "97"
]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(DATASETS_WITH_TERMS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BXupvNSFVWhG"
+ },
+ "source": [
+ "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xK8KTPic6UzR"
+ },
+ "source": [
+ "## Reproducibility statement"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "g0-oVisu6XX3"
+ },
+ "source": [
+ "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0kJwJ8mX6TH2"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.models.stats import SeasonalNaive\n",
+ "\n",
+ "combinations = [\n",
+ " (\"m4_weekly\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"medium\"),\n",
+ " (\"bizitobs_l2c/H\", \"long\"),\n",
+ "]\n",
+ "\n",
+ "for dataset_name, term in combinations:\n",
+ " evaluate_forecaster(\n",
+ " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=\"./results/seasonal_naive\",\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
},
+ "id": "0S-Oog_2UTCI",
+ "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
- },
- "id": "0S-Oog_2UTCI",
- "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1"
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "eval_df_sn"
},
- "outputs": [
- {
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "eval_df_sn"
- },
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " Seasonal_Naive | \n",
- " 453525.145918 | \n",
- " 453525.145918 | \n",
- " 347.991483 | \n",
- " 2.777295 | \n",
- " 0.089373 | \n",
- " 0.091613 | \n",
- " 26.631225 | \n",
- " 673.442756 | \n",
- " 0.122691 | \n",
- " 0.063399 | \n",
- " 0.060870 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " Seasonal_Naive | \n",
- " 281.843068 | \n",
- " 281.843068 | \n",
- " 12.531653 | \n",
- " 1.214064 | \n",
- " 1.360590 | \n",
- " 1.138373 | \n",
- " 7.486931 | \n",
- " 16.788182 | \n",
- " 0.904926 | \n",
- " 0.675488 | \n",
- " 0.521168 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " Seasonal_Naive | \n",
- " 456.373289 | \n",
- " 456.373289 | \n",
- " 15.667392 | \n",
- " 1.510286 | \n",
- " 1.691291 | \n",
- " 1.402410 | \n",
- " 18.533654 | \n",
- " 21.362895 | \n",
- " 1.293556 | \n",
- " 0.948684 | \n",
- " 0.904205 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " Seasonal_Naive | \n",
- " 309.272222 | \n",
- " 309.272222 | \n",
- " 13.635488 | \n",
- " 1.426054 | \n",
- " 2.438311 | \n",
- " 0.916854 | \n",
- " 22.036198 | \n",
- " 17.586137 | \n",
- " 1.074212 | \n",
- " 0.832895 | \n",
- " 0.941065 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
- "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
- "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
- "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 453525.145918 347.991483 2.777295 \n",
- "1 281.843068 12.531653 1.214064 \n",
- "2 456.373289 15.667392 1.510286 \n",
- "3 309.272222 13.635488 1.426054 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.089373 0.091613 26.631225 \n",
- "1 1.360590 1.138373 7.486931 \n",
- "2 1.691291 1.402410 18.533654 \n",
- "3 2.438311 0.916854 22.036198 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 673.442756 0.122691 0.063399 \n",
- "1 16.788182 0.904926 0.675488 \n",
- "2 21.362895 1.293556 0.948684 \n",
- "3 17.586137 1.074212 0.832895 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.060870 Econ/Fin 1 \n",
- "1 0.521168 Web/CloudOps 7 \n",
- "2 0.904205 Web/CloudOps 7 \n",
- "3 0.941065 Web/CloudOps 7 "
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " Seasonal_Naive | \n",
+ " 453525.145918 | \n",
+ " 453525.145918 | \n",
+ " 347.991483 | \n",
+ " 2.777295 | \n",
+ " 0.089373 | \n",
+ " 0.091613 | \n",
+ " 26.631225 | \n",
+ " 673.442756 | \n",
+ " 0.122691 | \n",
+ " 0.063399 | \n",
+ " 0.060870 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " Seasonal_Naive | \n",
+ " 281.843068 | \n",
+ " 281.843068 | \n",
+ " 12.531653 | \n",
+ " 1.214064 | \n",
+ " 1.360590 | \n",
+ " 1.138373 | \n",
+ " 7.486931 | \n",
+ " 16.788182 | \n",
+ " 0.904926 | \n",
+ " 0.675488 | \n",
+ " 0.521168 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " Seasonal_Naive | \n",
+ " 456.373289 | \n",
+ " 456.373289 | \n",
+ " 15.667392 | \n",
+ " 1.510286 | \n",
+ " 1.691291 | \n",
+ " 1.402410 | \n",
+ " 18.533654 | \n",
+ " 21.362895 | \n",
+ " 1.293556 | \n",
+ " 0.948684 | \n",
+ " 0.904205 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " Seasonal_Naive | \n",
+ " 309.272222 | \n",
+ " 309.272222 | \n",
+ " 13.635488 | \n",
+ " 1.426054 | \n",
+ " 2.438311 | \n",
+ " 0.916854 | \n",
+ " 22.036198 | \n",
+ " 17.586137 | \n",
+ " 1.074212 | \n",
+ " 0.832895 | \n",
+ " 0.941065 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "source": [
- "eval_df_sn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "id": "D1T6ar_H8Zo8"
- },
- "outputs": [],
- "source": [
- "official_eval_sn = pd.read_csv(\n",
- " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "id": "NETa8_6Y8ip-"
- },
- "outputs": [],
- "source": [
- "official_eval_sn = official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()"
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
+ "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
+ "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
+ "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 453525.145918 347.991483 2.777295 \n",
+ "1 281.843068 12.531653 1.214064 \n",
+ "2 456.373289 15.667392 1.510286 \n",
+ "3 309.272222 13.635488 1.426054 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.089373 0.091613 26.631225 \n",
+ "1 1.360590 1.138373 7.486931 \n",
+ "2 1.691291 1.402410 18.533654 \n",
+ "3 2.438311 0.916854 22.036198 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 673.442756 0.122691 0.063399 \n",
+ "1 16.788182 0.904926 0.675488 \n",
+ "2 21.362895 1.293556 0.948684 \n",
+ "3 17.586137 1.074212 0.832895 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.060870 Econ/Fin 1 \n",
+ "1 0.521168 Web/CloudOps 7 \n",
+ "2 0.904205 Web/CloudOps 7 \n",
+ "3 0.941065 Web/CloudOps 7 "
]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eval_df_sn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "id": "D1T6ar_H8Zo8"
+ },
+ "outputs": [],
+ "source": [
+ "official_eval_sn = pd.read_csv(\n",
+ " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "id": "NETa8_6Y8ip-"
+ },
+ "outputs": [],
+ "source": [
+ "official_eval_sn = (\n",
+ " official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
},
+ "id": "hkH2NLKMUVii",
+ "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
- },
- "id": "hkH2NLKMUVii",
- "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29"
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "official_eval_sn"
},
- "outputs": [
- {
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "official_eval_sn"
- },
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " Seasonal_Naive | \n",
- " 453525.145918 | \n",
- " 453525.145918 | \n",
- " 347.991483 | \n",
- " 2.777295 | \n",
- " 0.089373 | \n",
- " 0.091613 | \n",
- " 26.631225 | \n",
- " 673.442756 | \n",
- " 0.122691 | \n",
- " 0.063399 | \n",
- " 0.060870 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " Seasonal_Naive | \n",
- " 281.843068 | \n",
- " 281.843068 | \n",
- " 12.531653 | \n",
- " 1.214064 | \n",
- " 1.360590 | \n",
- " 1.138373 | \n",
- " 7.486931 | \n",
- " 16.788182 | \n",
- " 0.904926 | \n",
- " 0.675488 | \n",
- " 0.521168 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " Seasonal_Naive | \n",
- " 456.373289 | \n",
- " 456.373289 | \n",
- " 15.667392 | \n",
- " 1.510286 | \n",
- " 1.691291 | \n",
- " 1.402410 | \n",
- " 18.533654 | \n",
- " 21.362895 | \n",
- " 1.293556 | \n",
- " 0.948684 | \n",
- " 0.904205 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " Seasonal_Naive | \n",
- " 309.272222 | \n",
- " 309.272222 | \n",
- " 13.635488 | \n",
- " 1.426054 | \n",
- " 2.438311 | \n",
- " 0.916854 | \n",
- " 22.036198 | \n",
- " 17.586137 | \n",
- " 1.074212 | \n",
- " 0.832895 | \n",
- " 0.941065 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
- "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
- "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
- "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 453525.145918 347.991483 2.777295 \n",
- "1 281.843068 12.531653 1.214064 \n",
- "2 456.373289 15.667392 1.510286 \n",
- "3 309.272222 13.635488 1.426054 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.089373 0.091613 26.631225 \n",
- "1 1.360590 1.138373 7.486931 \n",
- "2 1.691291 1.402410 18.533654 \n",
- "3 2.438311 0.916854 22.036198 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 673.442756 0.122691 0.063399 \n",
- "1 16.788182 0.904926 0.675488 \n",
- "2 21.362895 1.293556 0.948684 \n",
- "3 17.586137 1.074212 0.832895 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.060870 Econ/Fin 1 \n",
- "1 0.521168 Web/CloudOps 7 \n",
- "2 0.904205 Web/CloudOps 7 \n",
- "3 0.941065 Web/CloudOps 7 "
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " Seasonal_Naive | \n",
+ " 453525.145918 | \n",
+ " 453525.145918 | \n",
+ " 347.991483 | \n",
+ " 2.777295 | \n",
+ " 0.089373 | \n",
+ " 0.091613 | \n",
+ " 26.631225 | \n",
+ " 673.442756 | \n",
+ " 0.122691 | \n",
+ " 0.063399 | \n",
+ " 0.060870 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " Seasonal_Naive | \n",
+ " 281.843068 | \n",
+ " 281.843068 | \n",
+ " 12.531653 | \n",
+ " 1.214064 | \n",
+ " 1.360590 | \n",
+ " 1.138373 | \n",
+ " 7.486931 | \n",
+ " 16.788182 | \n",
+ " 0.904926 | \n",
+ " 0.675488 | \n",
+ " 0.521168 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " Seasonal_Naive | \n",
+ " 456.373289 | \n",
+ " 456.373289 | \n",
+ " 15.667392 | \n",
+ " 1.510286 | \n",
+ " 1.691291 | \n",
+ " 1.402410 | \n",
+ " 18.533654 | \n",
+ " 21.362895 | \n",
+ " 1.293556 | \n",
+ " 0.948684 | \n",
+ " 0.904205 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " Seasonal_Naive | \n",
+ " 309.272222 | \n",
+ " 309.272222 | \n",
+ " 13.635488 | \n",
+ " 1.426054 | \n",
+ " 2.438311 | \n",
+ " 0.916854 | \n",
+ " 22.036198 | \n",
+ " 17.586137 | \n",
+ " 1.074212 | \n",
+ " 0.832895 | \n",
+ " 0.941065 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "source": [
- "official_eval_sn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "id": "OCifh_5D9B05"
- },
- "outputs": [],
- "source": [
- "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0wapKMgTFScM"
- },
- "source": [
- "## Changelog\n",
- "\n",
- "### **2025-11-06**\n",
- "\n",
- "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n",
- "\n",
- "### **2025-08-05**\n",
- "\n",
- "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)."
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
+ "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
+ "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
+ "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 453525.145918 347.991483 2.777295 \n",
+ "1 281.843068 12.531653 1.214064 \n",
+ "2 456.373289 15.667392 1.510286 \n",
+ "3 309.272222 13.635488 1.426054 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.089373 0.091613 26.631225 \n",
+ "1 1.360590 1.138373 7.486931 \n",
+ "2 1.691291 1.402410 18.533654 \n",
+ "3 2.438311 0.916854 22.036198 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 673.442756 0.122691 0.063399 \n",
+ "1 16.788182 0.904926 0.675488 \n",
+ "2 21.362895 1.293556 0.948684 \n",
+ "3 17.586137 1.074212 0.832895 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.060870 Econ/Fin 1 \n",
+ "1 0.521168 Web/CloudOps 7 \n",
+ "2 0.904205 Web/CloudOps 7 \n",
+ "3 0.941065 Web/CloudOps 7 "
]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
}
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": [],
- "runtime_attributes": {
- "runtime_version": "2025.07"
- }
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.12"
- }
+ ],
+ "source": [
+ "official_eval_sn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "id": "OCifh_5D9B05"
+ },
+ "outputs": [],
+ "source": [
+ "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0wapKMgTFScM"
+ },
+ "source": [
+ "## Changelog\n",
+ "\n",
+ "### **2025-11-06**\n",
+ "\n",
+ "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n",
+ "\n",
+ "### **2025-08-05**\n",
+ "\n",
+ "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)."
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": [],
+ "runtime_attributes": {
+ "runtime_version": "2025.07"
+ }
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
},
- "nbformat": 4,
- "nbformat_minor": 0
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/docs/examples/llm-providers.ipynb b/docs/examples/llm-providers.ipynb
index bab596fa..f2cb1d25 100644
--- a/docs/examples/llm-providers.ipynb
+++ b/docs/examples/llm-providers.ipynb
@@ -35,6 +35,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()"
]
},
@@ -46,7 +47,8 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "from timecopilot import TimeCopilot\n"
+ "\n",
+ "from timecopilot import TimeCopilot"
]
},
{
@@ -276,10 +278,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tc = TimeCopilot(\n",
- " llm=\"openai:gpt-4o\",\n",
- " retries=3\n",
- ")"
+ "tc = TimeCopilot(llm=\"openai:gpt-4o\", retries=3)"
]
},
{
@@ -365,10 +364,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tc = TimeCopilot(\n",
- " llm='ollama:gpt-oss:20b',\n",
- " retries=3\n",
- ")"
+ "tc = TimeCopilot(llm=\"ollama:gpt-oss:20b\", retries=3)"
]
},
{
@@ -396,10 +392,7 @@
" provider=OllamaProvider(base_url=\"http://localhost:11434/v1\"),\n",
")\n",
"\n",
- "tc = TimeCopilot(\n",
- " llm=llm,\n",
- " retries=3\n",
- ")"
+ "tc = TimeCopilot(llm=llm, retries=3)"
]
},
{
diff --git a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
index e0ad916f..f61767cd 100644
--- a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
+++ b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()\n"
+ "df.head()"
]
},
{
@@ -202,14 +202,13 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
" AutoARIMA(),\n",
" Chronos(repo_id=\"amazon/chronos-bolt-small\"),\n",
- " Moirai(), \n",
- " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"), \n",
- " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"), \n",
+ " Moirai(),\n",
+ " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"),\n",
+ " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"),\n",
" SeasonalNaive(),\n",
" ]\n",
")"
@@ -236,8 +235,14 @@
"metadata": {},
"outputs": [],
"source": [
- "level = [0, 20, 40, 60, 80] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n",
- "cv_df = tcf.cross_validation(df=df, h=12, level=level) "
+ "level = [\n",
+ " 0,\n",
+ " 20,\n",
+ " 40,\n",
+ " 60,\n",
+ " 80,\n",
+ "] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n",
+ "cv_df = tcf.cross_validation(df=df, h=12, level=level)"
]
},
{
@@ -605,12 +610,14 @@
],
"source": [
"eval_df = evaluate(\n",
- " cv_df.drop(columns=[\"cutoff\"]), \n",
- " train_df=df.query(\"ds <= '2024-08-31'\"), \n",
+ " cv_df.drop(columns=[\"cutoff\"]),\n",
+ " train_df=df.query(\"ds <= '2024-08-31'\"),\n",
" metrics=[partial(mase, seasonality=12), scaled_crps],\n",
" level=level,\n",
")\n",
- "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)"
+ "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n",
+ " 3\n",
+ ")"
]
}
],
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 847a1bed..40a91384 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -4,14 +4,14 @@
from functools import partial
from pathlib import Path
from typing import Any
-import numpy as np
+import numpy as np
import pandas as pd
from pydantic import BaseModel, Field
from pydantic_ai import Agent
from pydantic_ai.agent import AgentRunResult
from utilsforecast.evaluation import evaluate
-from utilsforecast.losses import _zero_to_nan, mae
+from utilsforecast.losses import mae
from ..models.utils.forecaster import (
get_seasonality,
@@ -24,11 +24,13 @@
category=FutureWarning,
)
+
def _zero_to_nan_pd(s: pd.Series) -> pd.Series:
s = s.astype(float).copy()
s[s == 0] = np.nan
return s
+
def mase(
df: pd.DataFrame,
models: list[str],
From 2747f0cb6085a2f8b287e60c16adf955fd51beca Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Fri, 9 Jan 2026 11:15:39 +0530
Subject: [PATCH 05/21] Fix live test flakiness in queryable checks
Update live tests to query using the actual unique_id from generated data instead of a hardcoded value. This avoids empty series selection and prevents flaky failures when checking queryability.
---
tests/test_live.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/tests/test_live.py b/tests/test_live.py
index 9d870393..93602f59 100644
--- a/tests/test_live.py
+++ b/tests/test_live.py
@@ -88,12 +88,13 @@ def test_is_queryable():
retries=3,
)
assert not tc.is_queryable()
- result = tc.forecast(
+ tc.forecast(
df=df,
query=f"Please forecast the series with a horizon of {h} and frequency D.",
)
assert tc.is_queryable()
- result = tc.query("how much will change the series with id 0?")
+ series_id = df["unique_id"].iloc[0]
+ result = tc.query(f"how much will change the series with id {series_id}?")
print(result.output)
@@ -148,7 +149,8 @@ async def test_async_is_queryable():
query=f"Please forecast the series with a horizon of {h} and frequency D.",
)
assert tc.is_queryable()
- answer = await tc.query("how much will change the series with id 0?")
+ series_id = df["unique_id"].iloc[0]
+ answer = await tc.query(f"how much will change the series with id {series_id}?")
print(answer.output)
From 7869b10809b6eba638c7084f70e73c3d93507cde Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:32:35 +0530
Subject: [PATCH 06/21] Update timecopilot/utils/experiment_handler.py
as suggested by copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
timecopilot/utils/experiment_handler.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 40a91384..eed90592 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -46,7 +46,6 @@ def mase(
models,
id_col=id_col,
target_col=target_col,
- # cutoff_col=cutoff_col,
).set_index(id_col)
cutoff = None
From 2b9c6975ef5314e5c472a088f2b97c16d905b19a Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:33:05 +0530
Subject: [PATCH 07/21] Update timecopilot/utils/experiment_handler.py
as suggested by copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
timecopilot/utils/experiment_handler.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index eed90592..12952203 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -272,8 +272,6 @@ def add_id_cutoff(df: pd.DataFrame):
if "cutoff" not in eval_df.columns:
if "id_cutoff" in eval_df.columns:
eval_df = eval_df.merge(cutoffs, on="id_cutoff", how="left")
- else:
- pass
cols = ["unique_id", "cutoff", "metric"] + models
cols = [c for c in cols if c in eval_df.columns]
From b79bc2e1756b1782e7d77f4e4b5b3b648154abb1 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:52:22 +0530
Subject: [PATCH 08/21] Fix ruff lint issues
---
docs/examples/aws-bedrock.ipynb | 9 ++++-----
timecopilot/utils/experiment_handler.py | 7 +++----
2 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb
index fcceedc8..03b4197d 100644
--- a/docs/examples/aws-bedrock.ipynb
+++ b/docs/examples/aws-bedrock.ipynb
@@ -31,18 +31,17 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "30e306f7",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
- "\n",
- "nest_asyncio.apply()\n",
- "\n",
"import pandas as pd\n",
"\n",
- "from timecopilot import TimeCopilot"
+ "from timecopilot import TimeCopilot\n",
+ "\n",
+ "nest_asyncio.apply()\n"
]
},
{
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 12952203..a53e3c6c 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -147,7 +147,7 @@ def read_df(path: str | Path) -> pd.DataFrame:
if suffix in {"csv", "txt"}:
df = read_fn(io.StringIO(resp.text)) # type: ignore[arg-type]
elif suffix in {"parquet"}:
- import pyarrow as pa # noqa: WPS433
+ import pyarrow as pa
table = pa.ipc.open_file(pa.BufferReader(resp.content)).read_all()
df = table.to_pandas()
@@ -269,9 +269,8 @@ def add_id_cutoff(df: pd.DataFrame):
models=models,
id_col="id_cutoff",
)
- if "cutoff" not in eval_df.columns:
- if "id_cutoff" in eval_df.columns:
- eval_df = eval_df.merge(cutoffs, on="id_cutoff", how="left")
+ if "cutoff" not in eval_df.columns and "id_cutoff" in eval_df.columns:
+ eval_df = eval_df.merge(cutoffs, on="id_cutoff", how="left")
cols = ["unique_id", "cutoff", "metric"] + models
cols = [c for c in cols if c in eval_df.columns]
From 387e0f54a5011d37583b140ce8cea58a1d73d5ad Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:53:48 +0530
Subject: [PATCH 09/21] lint fix
---
docs/examples/aws-bedrock.ipynb | 2 +-
timecopilot/utils/experiment_handler.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb
index 03b4197d..862bb683 100644
--- a/docs/examples/aws-bedrock.ipynb
+++ b/docs/examples/aws-bedrock.ipynb
@@ -41,7 +41,7 @@
"\n",
"from timecopilot import TimeCopilot\n",
"\n",
- "nest_asyncio.apply()\n"
+ "nest_asyncio.apply()"
]
},
{
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index a53e3c6c..9bea60b6 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -147,7 +147,7 @@ def read_df(path: str | Path) -> pd.DataFrame:
if suffix in {"csv", "txt"}:
df = read_fn(io.StringIO(resp.text)) # type: ignore[arg-type]
elif suffix in {"parquet"}:
- import pyarrow as pa
+ import pyarrow as pa
table = pa.ipc.open_file(pa.BufferReader(resp.content)).read_all()
df = table.to_pandas()
From 91c2f6ba3c89c7b73490626c331c4f803f262c5b Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Tue, 13 Jan 2026 11:00:11 +0530
Subject: [PATCH 10/21] Fix test_is_queryable failure
Remove series-specific query that caused invalid dataframe
Keep test focused on queryable state only
---
tests/test_live.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tests/test_live.py b/tests/test_live.py
index 93602f59..01d2201f 100644
--- a/tests/test_live.py
+++ b/tests/test_live.py
@@ -93,8 +93,7 @@ def test_is_queryable():
query=f"Please forecast the series with a horizon of {h} and frequency D.",
)
assert tc.is_queryable()
- series_id = df["unique_id"].iloc[0]
- result = tc.query(f"how much will change the series with id {series_id}?")
+ result = tc.query("how much will the series change?")
print(result.output)
From d0829f72758c1215608bbf71db5b3fc0878b0933 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Tue, 13 Jan 2026 11:15:01 +0530
Subject: [PATCH 11/21] Fix live test failure
Disable anomaly detection in test_is_queryable
---
tests/test_live.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/test_live.py b/tests/test_live.py
index 01d2201f..f492d061 100644
--- a/tests/test_live.py
+++ b/tests/test_live.py
@@ -91,6 +91,7 @@ def test_is_queryable():
tc.forecast(
df=df,
query=f"Please forecast the series with a horizon of {h} and frequency D.",
+ detect_anomalies=False,
)
assert tc.is_queryable()
result = tc.query("how much will the series change?")
From 1bad91cf9bcb17f221b35d52ee92c5c47ce70401 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Tue, 13 Jan 2026 11:21:13 +0530
Subject: [PATCH 12/21] Revert "Fix live test failure"
This reverts commit d0829f72758c1215608bbf71db5b3fc0878b0933.
---
tests/test_live.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/test_live.py b/tests/test_live.py
index f492d061..01d2201f 100644
--- a/tests/test_live.py
+++ b/tests/test_live.py
@@ -91,7 +91,6 @@ def test_is_queryable():
tc.forecast(
df=df,
query=f"Please forecast the series with a horizon of {h} and frequency D.",
- detect_anomalies=False,
)
assert tc.is_queryable()
result = tc.query("how much will the series change?")
From e603847532da2195a345fa10be206cc275d423cb Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Mon, 19 Jan 2026 10:04:13 +0530
Subject: [PATCH 13/21] Fix MASE scaling issues and clean up unused arguments
Replaced the imported _zero_to_nan with a pandas-based implementation to avoid division-by-zero and index alignment issues.
Fixed the unnecessary .copy() usage.
Removed the unused time_col: str = "ds" argument.
Reverted earlier notebook changes that were unrelated and only attempted to address lint and styling errors.
and also did the final lint fix,
---
docs/examples/agent-quickstart.ipynb | 3 +-
...maly-detection-forecaster-quickstart.ipynb | 3 +-
docs/examples/aws-bedrock.ipynb | 43 +-
docs/examples/chronos-family.ipynb | 12 +-
docs/examples/cryptocurrency-quickstart.ipynb | 86 ++--
docs/examples/forecaster-quickstart.ipynb | 11 +-
docs/examples/gift-eval.ipynb | 24 +-
docs/examples/google-llms.ipynb | 444 ++++++++++++++++++
docs/examples/llm-providers.ipynb | 17 +-
docs/examples/sktime.ipynb | 418 +++++++++++++++++
...ndation-models-comparison-quickstart.ipynb | 27 +-
tests/test_agent.py | 4 +-
timecopilot/forecaster.py | 8 +-
timecopilot/utils/experiment_handler.py | 3 +-
14 files changed, 971 insertions(+), 132 deletions(-)
create mode 100644 docs/examples/google-llms.ipynb
create mode 100644 docs/examples/sktime.ipynb
diff --git a/docs/examples/agent-quickstart.ipynb b/docs/examples/agent-quickstart.ipynb
index 92f97fbc..f6281ede 100644
--- a/docs/examples/agent-quickstart.ipynb
+++ b/docs/examples/agent-quickstart.ipynb
@@ -159,10 +159,11 @@
"metadata": {},
"outputs": [],
"source": [
+ "\n",
"tc = TimeCopilot(\n",
" llm=\"openai:gpt-4o\",\n",
" retries=3,\n",
- ")"
+ ")\n"
]
},
{
diff --git a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
index a5443bdd..45bff4fb 100644
--- a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
+++ b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
@@ -184,7 +184,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/the_anomaly_tour.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df"
+ "df\n"
]
},
{
@@ -223,6 +223,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb
index 862bb683..666bd52e 100644
--- a/docs/examples/aws-bedrock.ipynb
+++ b/docs/examples/aws-bedrock.ipynb
@@ -31,17 +31,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "30e306f7",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
- "import pandas as pd\n",
"\n",
- "from timecopilot import TimeCopilot\n",
+ "nest_asyncio.apply()\n",
+ "\n",
+ "import pandas as pd\n",
"\n",
- "nest_asyncio.apply()"
+ "from timecopilot import TimeCopilot"
]
},
{
@@ -88,7 +89,7 @@
"id": "9c4c7220",
"metadata": {},
"source": [
- "If you store your environment variablesles in a `.env` file, you can use the following load them into your environment from the file:"
+ "If you store your environment variables in a `.env` file, you can use the following load them into your environment from the file:"
]
},
{
@@ -164,7 +165,7 @@
"outputs": [],
"source": [
"tc = TimeCopilot(\n",
- " llm=\"bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0\",\n",
+ " llm='bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0',\n",
")"
]
},
@@ -187,7 +188,9 @@
"source": [
"from pydantic_ai.models.bedrock import BedrockConverseModel\n",
"\n",
- "model = BedrockConverseModel(\"us.anthropic.claude-3-5-sonnet-20241022-v2:0\")\n",
+ "model = BedrockConverseModel(\n",
+ " 'us.anthropic.claude-3-5-sonnet-20241022-v2:0'\n",
+ ")\n",
"tc = TimeCopilot(\n",
" llm=model,\n",
")"
@@ -217,7 +220,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")"
+ "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")\n"
]
},
{
@@ -319,30 +322,6 @@
"print(result.output.tsfeatures_analysis)"
]
},
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "1fe2ed8a",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The time series analysis reveals strong seasonal and trend components:\n",
- "1. High seasonal strength (0.98) indicates pronounced yearly patterns\n",
- "2. Strong positive autocorrelation (x_acf1: 0.95) suggests strong trend\n",
- "3. Significant Holt-Winters seasonality (hw_gamma: 0.75) confirms seasonal importance\n",
- "4. High stability (0.93) indicates consistent patterns\n",
- "5. Non-stationary series (KPSS: 2.74) confirms strong trend\n",
- "6. Clear seasonal peaks (July) and troughs (November)\n"
- ]
- }
- ],
- "source": [
- "print(result.output.tsfeatures_analysis)"
- ]
- },
{
"cell_type": "code",
"execution_count": 9,
diff --git a/docs/examples/chronos-family.ipynb b/docs/examples/chronos-family.ipynb
index 84174108..f9bb2021 100644
--- a/docs/examples/chronos-family.ipynb
+++ b/docs/examples/chronos-family.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()"
+ "df.head()\n"
]
},
{
@@ -241,7 +241,7 @@
"outputs": [],
"source": [
"level = [20, 40, 60, 80]\n",
- "cv_df = tcf.cross_validation(df=df, h=12, level=level)"
+ "cv_df = tcf.cross_validation(df=df, h=12, level=level) "
]
},
{
@@ -638,14 +638,12 @@
],
"source": [
"eval_df = evaluate(\n",
- " cv_df.drop(columns=[\"cutoff\"]),\n",
- " train_df=df.query(\"ds <= '2024-08-31'\"),\n",
+ " cv_df.drop(columns=[\"cutoff\"]), \n",
+ " train_df=df.query(\"ds <= '2024-08-31'\"), \n",
" metrics=[partial(mase, seasonality=12), scaled_crps],\n",
" level=level,\n",
")\n",
- "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n",
- " 3\n",
- ")"
+ "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)"
]
}
],
diff --git a/docs/examples/cryptocurrency-quickstart.ipynb b/docs/examples/cryptocurrency-quickstart.ipynb
index e22475b2..846d74f5 100644
--- a/docs/examples/cryptocurrency-quickstart.ipynb
+++ b/docs/examples/cryptocurrency-quickstart.ipynb
@@ -16,6 +16,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "\n",
"# Import all the timecopilot goodies\n",
"import os\n",
"\n",
@@ -60,7 +61,7 @@
"outputs": [],
"source": [
"files = os.listdir(path)\n",
- "files = [path + \"/\" + x for x in files]"
+ "files = [path+'/'+x for x in files]"
]
},
{
@@ -195,18 +196,18 @@
"# Read all filez and set them up to the readable structure for timecopilot\n",
"for file in files:\n",
" temp_df = pd.read_csv(file)\n",
- " temp_df = temp_df[[\"Symbol\", \"Date\", \"Close\"]]\n",
- " temp_df.columns = [\"unique_id\", \"ds\", \"y\"]\n",
- " big_df = pd.concat([big_df, temp_df])\n",
+ " temp_df = temp_df[['Symbol','Date','Close']]\n",
+ " temp_df.columns = ['unique_id','ds','y']\n",
+ " big_df = pd.concat([big_df,temp_df])\n",
"\n",
"big_df = big_df.reset_index(drop=True)\n",
"big_df[\"ds\"] = pd.to_datetime(big_df[\"ds\"], dayfirst=True, errors=\"coerce\")\n",
"\n",
- "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further.\n",
+ "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further. \n",
"# big_df = big_df[big_df.ds >= \"2021-01-01\"]\n",
- "cryptos = [\"MIOTA\", \"XEM\", \"ETH\", \"LTC\", \"DOGE\", \"CRO\", \"USDC\", \"ADA\"]\n",
- "big_df = big_df[big_df.unique_id.isin(cryptos)]\n",
- "big_df = big_df.reset_index(drop=True)\n",
+ "cryptos=['MIOTA','XEM','ETH','LTC','DOGE','CRO','USDC','ADA']\n",
+ "big_df=big_df[big_df.unique_id.isin(cryptos)]\n",
+ "big_df=big_df.reset_index(drop=True)\n",
"big_df"
]
},
@@ -338,7 +339,6 @@
" df_out.loc[idx, col] = np.nan\n",
" return df_out\n",
"\n",
- "\n",
"df_missing = add_missing(big_df, col=\"y\", frac=0.03, seed=42)\n",
"df_missing = df_missing.sample(frac=1, random_state=42).reset_index(drop=True)\n",
"print(df_missing)"
@@ -707,14 +707,12 @@
}
],
"source": [
- "anomaly_summary_xlm = anomalies_df[\n",
+ "anomaly_summary_xlm=anomalies_df[\n",
" # (anomalies_df.unique_id=='SOL') & \\\n",
- " (\n",
- " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
- " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
- " | (anomalies_df[\"Theta-anomaly\"] == True)\n",
- " )\n",
- "].reset_index(drop=True)\n",
+ " ((anomalies_df['Chronos-anomaly']==True) | \\\n",
+ " (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
+ " (anomalies_df['Theta-anomaly']==True)\n",
+ " )].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -954,14 +952,12 @@
}
],
"source": [
- "anomaly_summary_xlm = anomalies_df[\n",
- " (anomalies_df.unique_id == \"ADA\")\n",
- " & (\n",
- " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
- " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
- " | (anomalies_df[\"Theta-anomaly\"] == True)\n",
- " )\n",
- "].reset_index(drop=True)\n",
+ "anomaly_summary_xlm=anomalies_df[\n",
+ " (anomalies_df.unique_id=='ADA') & \\\n",
+ " ((anomalies_df['Chronos-anomaly']==True) | \\\n",
+ " (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
+ " (anomalies_df['Theta-anomaly']==True)\n",
+ " )].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -1201,14 +1197,12 @@
}
],
"source": [
- "anomaly_summary_xlm = anomalies_df[\n",
- " (anomalies_df.unique_id == \"ADA\")\n",
- " & (\n",
- " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
- " & (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
- " # (anomalies_df['Theta-anomaly']==True)\n",
- " )\n",
- "].reset_index(drop=True)\n",
+ "anomaly_summary_xlm=anomalies_df[\n",
+ " (anomalies_df.unique_id=='ADA') & \\\n",
+ " ((anomalies_df['Chronos-anomaly']==True) & \\\n",
+ " (anomalies_df['SeasonalNaive-anomaly']==True) \\\n",
+ " # (anomalies_df['Theta-anomaly']==True)\n",
+ " )].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -1245,12 +1239,12 @@
"source": [
"tcf1 = TimeCopilotForecaster(\n",
" models=[\n",
- " AutoARIMA(),\n",
+ " AutoARIMA(), \n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
" Theta(),\n",
- " AutoETS(),\n",
- " Moirai(),\n",
- " Prophet(),\n",
+ " AutoETS(), \n",
+ " Moirai(), \n",
+ " Prophet(), \n",
" SeasonalNaive(),\n",
" ]\n",
")"
@@ -1263,7 +1257,7 @@
"metadata": {},
"outputs": [],
"source": [
- "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80, 90])"
+ "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80,90])"
]
},
{
@@ -1307,9 +1301,9 @@
"metadata": {},
"outputs": [],
"source": [
- "eth_fcst_normal = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
- " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
- "].reset_index(drop=True)"
+ "eth_fcst_normal=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
+ " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
+ " .reset_index(drop=True)"
]
},
{
@@ -1349,9 +1343,9 @@
"metadata": {},
"outputs": [],
"source": [
- "eth_fcst_missing = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
- " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
- "].reset_index(drop=True)"
+ "eth_fcst_missing=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
+ " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
+ " .reset_index(drop=True)"
]
},
{
@@ -1519,9 +1513,9 @@
}
],
"source": [
- "compare = eth_fcst_normal.merge(eth_fcst_missing, on=[\"ds\", \"unique_id\"])\n",
- "compare[\"dif\"] = abs(compare[\"Chronos_x\"] - compare[\"Chronos_y\"])\n",
- "print(compare[\"dif\"].sum())"
+ "compare=eth_fcst_normal.merge(eth_fcst_missing,on=['ds','unique_id'])\n",
+ "compare['dif']=abs(compare['Chronos_x']-compare['Chronos_y'])\n",
+ "print(compare['dif'].sum())"
]
},
{
diff --git a/docs/examples/forecaster-quickstart.ipynb b/docs/examples/forecaster-quickstart.ipynb
index 31ed2858..9a0f93b0 100644
--- a/docs/examples/forecaster-quickstart.ipynb
+++ b/docs/examples/forecaster-quickstart.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()"
+ "df.head()\n"
]
},
{
@@ -171,12 +171,13 @@
"metadata": {},
"outputs": [],
"source": [
+ "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
- " AutoARIMA(),\n",
- " AutoETS(),\n",
- " Moirai(),\n",
- " Prophet(),\n",
+ " AutoARIMA(), \n",
+ " AutoETS(), \n",
+ " Moirai(), \n",
+ " Prophet(), \n",
" SeasonalNaive(),\n",
" ]\n",
")"
diff --git a/docs/examples/gift-eval.ipynb b/docs/examples/gift-eval.ipynb
index 6273cb26..672c3e89 100644
--- a/docs/examples/gift-eval.ipynb
+++ b/docs/examples/gift-eval.ipynb
@@ -174,12 +174,12 @@
"\n",
"\n",
"def evaluate_forecaster(\n",
- " forecaster: Forecaster,\n",
- " dataset_name: str,\n",
- " term: str,\n",
- " output_path: str,\n",
- " storage_path: str,\n",
- "):\n",
+ " forecaster: Forecaster,\n",
+ " dataset_name: str,\n",
+ " term: str,\n",
+ " output_path: str,\n",
+ " storage_path: str,\n",
+ " ):\n",
" \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
"\n",
" # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
@@ -231,7 +231,7 @@
"source": [
"import torch\n",
"\n",
- "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
+ "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
" combinations = [\n",
" (\"m4_weekly\", \"short\"),\n",
" (\"bizitobs_l2c/H\", \"short\"),\n",
@@ -249,7 +249,7 @@
" )\n",
"\n",
" # Load consolidated results in GIFT-Eval format\n",
- " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")"
+ " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")\n"
]
},
{
@@ -688,9 +688,9 @@
],
"source": [
"if torch.cuda.is_available():\n",
- " from IPython.display import display\n",
+ " from IPython.display import display\n",
"\n",
- " display(eval_df)"
+ " display(eval_df)"
]
},
{
@@ -1280,9 +1280,7 @@
},
"outputs": [],
"source": [
- "official_eval_sn = (\n",
- " official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()\n",
- ")"
+ "official_eval_sn = official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()"
]
},
{
diff --git a/docs/examples/google-llms.ipynb b/docs/examples/google-llms.ipynb
new file mode 100644
index 00000000..16f5291c
--- /dev/null
+++ b/docs/examples/google-llms.ipynb
@@ -0,0 +1,444 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "eeebf16a",
+ "metadata": {},
+ "source": [
+ "# Using TimeCopilot with Google Endpoints"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "51eaf62c",
+ "metadata": {},
+ "source": [
+ "## Generative Language API\n",
+ "\n",
+ "### Requirements \n",
+ "\n",
+ "1. A Google Account\n",
+ "2. An API key created through [AI Studio](https://aistudio.google.com/api-keys)\n",
+ " - Even if you are on the free tier, you may still need to setup billing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a8fa1ca",
+ "metadata": {},
+ "source": [
+ "### Dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "26a30c5f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import nest_asyncio\n",
+ "\n",
+ "nest_asyncio.apply()\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "from timecopilot import TimeCopilot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7aadea62",
+ "metadata": {},
+ "source": [
+ "### Environment Variables\n",
+ "\n",
+ "Api keys and other configuration elements may need to be loaded into the environment so pydantic can read them during setup, at least when specifying the llm with a string. \n",
+ "\n",
+ "The api key should be loaded into the `GOOGLE_API_KEY` environment variable."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da478e69",
+ "metadata": {},
+ "source": [
+ "This is how you would load the environment variables in in a Unix-like system:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6847270a",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "export GOOGLE_API_KEY='your-secret-key'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1ee9bf81",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "source": [
+ "If you store your environment variables in a `.env` file, you can use the following load them into your environment from the file:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e8a3e521",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "export $(grep -v '^#' .env | xargs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41aada4e",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "source": [
+ "Or you could load it in python with [dotenv](https://pypi.org/project/python-dotenv/):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "57575c94",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import load_dotenv\n",
+ "\n",
+ "load_dotenv()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e29470df",
+ "metadata": {},
+ "source": [
+ "### TimeCopilot Agent\n",
+ "\n",
+ "TimeCopilot uses Pydantic to work with llms, so you specify the LLM the same way you'd specify an Agent with Pydantic. Either with a string in the form `'google-gla:model-id'` or instantiating a model with `pydantic_ai.models.google.GoogleModel`. \n",
+ "\n",
+ "For more details on how Pydantic works with Google's Generative Language API, see the [Pydantic docs on Google](https://ai.pydantic.dev/models/google/)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "972dad6f",
+ "metadata": {},
+ "source": [
+ "#### String example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "33c9c144",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tc = TimeCopilot(\n",
+ " llm='google-gla:gemini-3-pro-preview',\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "27a0ba0c",
+ "metadata": {},
+ "source": [
+ "#### GoogleProvider Example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "afcd5244",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pydantic_ai.models.google import GoogleModel\n",
+ "from pydantic_ai.providers.google import GoogleProvider\n",
+ "\n",
+ "provider = GoogleProvider(api_key='your-api-key')\n",
+ "google_model = GoogleModel('gemini-3-pro-preview', provider=provider)\n",
+ "tc = TimeCopilot(\n",
+ " llm=google_model,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "838e02f3",
+ "metadata": {},
+ "source": [
+ "### Use your TimeCopilot Agent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "081976d4",
+ "metadata": {},
+ "source": [
+ "#### Load your data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "bfc33907",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0990eb1f",
+ "metadata": {},
+ "source": [
+ "#### Generate forecasts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "0d6c7a5c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "1it [00:00, 101.63it/s]\n",
+ "1it [00:00, 20.05it/s]\n",
+ "1it [00:00, 8.42it/s]\n",
+ "0it [00:00, ?it/s]15:57:28 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:28 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "1it [00:03, 3.53s/it]\n",
+ "1it [00:00, 151.38it/s]\n",
+ "15:57:36 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:36 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "0it [00:00, ?it/s]15:57:44 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:44 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "1it [00:03, 3.04s/it]15:57:47 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:47 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "2it [00:06, 3.02s/it]15:57:50 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:50 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "3it [00:09, 3.05s/it]15:57:53 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:53 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "4it [00:12, 3.07s/it]15:57:56 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:56 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "5it [00:15, 3.09s/it]15:57:59 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:57:59 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "6it [00:18, 3.09s/it]15:58:02 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:58:02 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "7it [00:21, 3.14s/it]15:58:05 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:58:05 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "8it [00:24, 3.14s/it]15:58:08 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:58:08 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "9it [00:27, 3.10s/it]15:58:11 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:58:11 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "10it [00:30, 3.07s/it]15:58:14 - cmdstanpy - INFO - Chain [1] start processing\n",
+ "15:58:14 - cmdstanpy - INFO - Chain [1] done processing\n",
+ "11it [00:33, 3.08s/it]\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = tc.forecast(df=df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "9073e10a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AgentRunResult(output=ForecastAgentOutput(tsfeatures_analysis='The time series analysis revealed a highly structured and predictable dataset with the following key characteristics:\\n\\n* **Seasonality (High):** `seasonal_strength` is 0.98 with a clear 12-month period (`seasonal_period`: 12). This is the dominant feature, necessitating a model that explicitly handles seasonality.\\n* **Trend (High):** `trend` strength is 0.99, indicating a consistent upward trajectory. The data is non-stationary (`unitroot_kpss`: 2.74).\\n* **Entropy (Low):** A value of 0.43 indicates the signal is very strong compared to the noise, suggesting high forecastability.\\n* **Structure:** The data exhibits `nonlinearity` (0.42) and strong autocorrelation (`acf_features`), consistent with the classic multiplicative seasonality (variance increases as the trend rises) seen in airline passenger data.', selected_model='Prophet', model_details=\"**Selected Model: Prophet**\\n\\n**Technical Overview:**\\nProphet is an additive regression model developed by Meta. It decomposes the time series into three main components:\\n1. **Trend:**Modeled here as a non-linear saturating growth or piecewise linear model.\\n2. **Seasonality:** Modeled using Fourier series to capture periodic changes (in this case, annual).\\n3. **Holidays/Events:** (Not explicitly used here, but part of the model capability).\\n\\n**Why it fits this data:**\\n* **Flexibility:** It adapts well to strong seasonal patterns that don't fit perfectly into rigid ARIMA/ETS structures.\\n* **Trend Changes:** It can automatically detect change points in the trend, which is useful for long series like AirPassengers where growth rates may vary over decades.\\n* **Robustness:** It is generally robust to missing data and shifts, providing a stable forecast for business metrics.\", model_comparison=\"We evaluated five models using Mean Absolute Scaled Error (MASE). Lower scores indicate better performance.\\n\\n1. **Prophet (MASE: 1.09):** The clear winner. Prophet's ability to model distinct seasonal components and piecewise trends allowed it to capture the complex seasonality and trend shifts of the dataset much better than the rigid statistical models.\\n2. **DynamicOptimizedTheta (MASE: 1.66):** Performed respectably, significantly beating the baseline. It handled the general trend and seasonality well but was less precise than Prophet.\\n3. **AutoETS (MASE: 2.38) & AutoARIMA (MASE: 2.40):** Surprisingly, these standard models performed only slightly better than the baseline. They likely struggled with the specific window of evaluation or the intensifying amplitude of the seasonality without specific multiplicative tuning in the default setup.\\n4. **SeasonalNaive (MASE: 2.49):** The baseline model. While it captures the basic seasonal pattern, it fails to account for the trend, leading to the highest error rate.\", is_better_than_seasonal_naive=True, reason_for_selection='Prophet was selected because it achieved the lowest MASE score (1.09) in cross-validation, significantly outperforming the next best model (DynamicOptimizedTheta at 1.66) and the Seasonal Naive baseline (2.49). Its ability to accurately model the compounding seasonal variance and strong trend made it the superior choice.', forecast_analysis=\"The forecast for the next 24 months projects a continuation of the established historical patterns:\\n\\n1. **Trend:** A clear positive trend persists. The passenger numbers are expected to grow from a baseline of ~460 in the first forecast month to peaks exceeding 600 in the second year.\\n2. **Seasonality:** The strong annual cycle remains dominant.\\n * **Peaks:** We expect major peaks in the summer months (July/August), reaching approximately 577 in the first forecasted year and 614 in the second.\\n * **Troughs:** The lowest activity is predicted for the winter months (November-February), dipping below 470.\\n3. **Reliability:** Given the model's strong performance (MASE ~1.09) and the data's high predictability (low entropy), this forecast is considered reliable, assuming no major external disruptions to air travel occur (e.g., economic recession).\", anomaly_analysis=\"The analysis detected 8 anomalies (approximately 6.1% of the data points) using a 95% confidence level. \\n\\n**Pattern of Anomalies:**\\nThe anomalies are clustered in specific months across several years:\\n* **Summer Peaks:** July 1955, July 1956, July/August 1959, July/August 1960.\\n* **Late 1958:** November/December 1958.\\n\\n**Interpretation:**\\nMost anomalies coincide with the peak travel months (July/August). This suggests that the seasonal amplitude (the difference between low and high months) was increasing at a rate slightly faster than the model predicted. In the AirPassengers dataset, seasonality is multiplicative (the peaks get higher as the total number of passengers grows). While Prophet handles seasonality well, extreme seasonal peaks in the final years exceeded the model's confidence intervals, flagging them as anomalies. These should likely be interpreted as 'stronger than expected' seasonal deviations rather than errors in data collection.\", user_query_response='I have analyzed the AirPassengers dataset as requested. \\n\\nThe data is highly seasonal and trend-driven. After testing multiple models, **Prophet** proved to be the most accurate, reducing the error rate by over 50% compared to a standard seasonal baseline. \\n\\nThe forecast predicts continued growth in air travel, with annual peaks in July/August exceeding 600 passengers in the second forecasted year. Be aware that the \"anomalies\" detected are primarily just exceptionally strong summer months in the most recent years, reflecting the booming growth rather than data errors.'))\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "bcbe0d10",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The time series analysis revealed a highly structured and predictable dataset with the following key characteristics:\n",
+ "\n",
+ "* **Seasonality (High):** `seasonal_strength` is 0.98 with a clear 12-month period (`seasonal_period`: 12). This is the dominant feature, necessitating a model that explicitly handles seasonality.\n",
+ "* **Trend (High):** `trend` strength is 0.99, indicating a consistent upward trajectory. The data is non-stationary (`unitroot_kpss`: 2.74).\n",
+ "* **Entropy (Low):** A value of 0.43 indicates the signal is very strong compared to the noise, suggesting high forecastability.\n",
+ "* **Structure:** The data exhibits `nonlinearity` (0.42) and strong autocorrelation (`acf_features`), consistent with the classic multiplicative seasonality (variance increases as the trend rises) seen in airline passenger data.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(result.output.tsfeatures_analysis)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "b4c25d2c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " unique_id ds Prophet\n",
+ "0 AirPassengers 1961-01-01 466.560401\n",
+ "1 AirPassengers 1961-02-01 461.042082\n",
+ "2 AirPassengers 1961-03-01 493.413542\n",
+ "3 AirPassengers 1961-04-01 492.113653\n",
+ "4 AirPassengers 1961-05-01 496.445709\n",
+ "5 AirPassengers 1961-06-01 537.592041\n",
+ "6 AirPassengers 1961-07-01 577.166093\n",
+ "7 AirPassengers 1961-08-01 577.599117\n",
+ "8 AirPassengers 1961-09-01 529.038266\n",
+ "9 AirPassengers 1961-10-01 493.889181\n",
+ "10 AirPassengers 1961-11-01 460.030234\n",
+ "11 AirPassengers 1961-12-01 489.392785\n",
+ "12 AirPassengers 1962-01-01 502.415939\n",
+ "13 AirPassengers 1962-02-01 496.321423\n",
+ "14 AirPassengers 1962-03-01 531.969966\n",
+ "15 AirPassengers 1962-04-01 528.065107\n",
+ "16 AirPassengers 1962-05-01 534.174659\n",
+ "17 AirPassengers 1962-06-01 573.615281\n",
+ "18 AirPassengers 1962-07-01 614.245102\n",
+ "19 AirPassengers 1962-08-01 614.206790\n",
+ "20 AirPassengers 1962-09-01 566.306418\n",
+ "21 AirPassengers 1962-10-01 530.606803\n",
+ "22 AirPassengers 1962-11-01 497.766797\n",
+ "23 AirPassengers 1962-12-01 527.289739\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(result.fcst_df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "088463a0",
+ "metadata": {},
+ "source": [
+ "#### Make Queries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8d3851b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_result = tc.query(\"What will the total number of passengers be in the next year?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "0c74d9a9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Based on the forecast from the **Prophet** model (which performed best with a MASE of **1.09**), the total number of passengers predicted for the next year (1961) is approximately **6,074**.\n",
+ "\n",
+ "This figure is derived by summing the forecasted values for the first 12 months:\n",
+ "\n",
+ "* **January:** 467\n",
+ "* **February:** 461\n",
+ "* **March:** 493\n",
+ "* **April:** 492\n",
+ "* **May:** 496\n",
+ "* **June:** 538\n",
+ "* **July:** 577\n",
+ "* **August:** 578\n",
+ "* **September:** 529\n",
+ "* **October:** 494\n",
+ "* **November:** 460\n",
+ "* **December:** 489\n",
+ "\n",
+ "This forecast reflects the **strong upward trend** (strength: 0.99) and **seasonality** (strength: 0.98) identified in the historical data.\n",
+ "\n",
+ "Would you like to see a plot of this forecast to visualize the monthly variations?\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(query_result.output)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "timecopilot",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.14"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/llm-providers.ipynb b/docs/examples/llm-providers.ipynb
index f2cb1d25..d3f9afce 100644
--- a/docs/examples/llm-providers.ipynb
+++ b/docs/examples/llm-providers.ipynb
@@ -48,7 +48,7 @@
"source": [
"import pandas as pd\n",
"\n",
- "from timecopilot import TimeCopilot"
+ "from timecopilot import TimeCopilot\n"
]
},
{
@@ -278,7 +278,10 @@
"metadata": {},
"outputs": [],
"source": [
- "tc = TimeCopilot(llm=\"openai:gpt-4o\", retries=3)"
+ "tc = TimeCopilot(\n",
+ " llm=\"openai:gpt-4o\",\n",
+ " retries=3\n",
+ ")"
]
},
{
@@ -364,7 +367,10 @@
"metadata": {},
"outputs": [],
"source": [
- "tc = TimeCopilot(llm=\"ollama:gpt-oss:20b\", retries=3)"
+ "tc = TimeCopilot(\n",
+ " llm='ollama:gpt-oss:20b',\n",
+ " retries=3\n",
+ ")"
]
},
{
@@ -392,7 +398,10 @@
" provider=OllamaProvider(base_url=\"http://localhost:11434/v1\"),\n",
")\n",
"\n",
- "tc = TimeCopilot(llm=llm, retries=3)"
+ "tc = TimeCopilot(\n",
+ " llm=llm,\n",
+ " retries=3\n",
+ ")"
]
},
{
diff --git a/docs/examples/sktime.ipynb b/docs/examples/sktime.ipynb
new file mode 100644
index 00000000..dae92ba6
--- /dev/null
+++ b/docs/examples/sktime.ipynb
@@ -0,0 +1,418 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "e890f3b3",
+ "metadata": {},
+ "source": [
+ "# Using sktime models\n",
+ "\n",
+ "This is an example for using sktime based models with the `timecopilot` library."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6140af37",
+ "metadata": {},
+ "source": [
+ "## imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "f5c85b29",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import nest_asyncio\n",
+ "\n",
+ "nest_asyncio.apply()\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "import timecopilot"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "694b7854",
+ "metadata": {},
+ "source": [
+ "## Setup the sktime model and adapt it to TimeCopilot\n",
+ "\n",
+ "sktime models need to be adapted to work properly with TimeCopilot. This is done by creating your model with sktime and passing it through SKTimeAdapter. Some sktime models may require more configuration to function properly with the data you intend to use it on. For example, when using sktime's NaiveForecaster with yearly data you might want to initialize it with an `sp` argument of `12` like this `NaiveForecaster(sp=12)`.\n",
+ "\n",
+ "The `Alias` argument should also be provided, especially if you plan on adding multiple sktime forecasters. If you add multiple sktime models without specifying aliases, TimeCopilot will not be able to properly call all of them."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e1870fe0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sktime.forecasting.trend import TrendForecaster\n",
+ "\n",
+ "from timecopilot.models.adapters.sktime import SKTimeAdapter\n",
+ "\n",
+ "trend_forecaster = TrendForecaster()\n",
+ "\n",
+ "adapted_model = SKTimeAdapter(\n",
+ " model=trend_forecaster,\n",
+ " alias=\"TrendForecaster\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "37419dd4",
+ "metadata": {},
+ "source": [
+ "## Create a TimeCopilot instance with your sktime model\n",
+ "\n",
+ "You will need to specify the forecasters you're using when using sktime models. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "f793b038",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tc = timecopilot.TimeCopilot(\n",
+ " llm=\"openai:gpt-4o\",\n",
+ " forecasters=[\n",
+ " adapted_model,\n",
+ " ],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "401d5b6f",
+ "metadata": {},
+ "source": [
+ "### Extending default model list with an sktime adapted model\n",
+ "\n",
+ "if you want to use the default list with the addition of your sktime model you could make a copy of the default list and append your model to it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "76505216",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model_list = timecopilot.agent.DEFAULT_MODELS.copy()\n",
+ "model_list.append(adapted_model)\n",
+ "\n",
+ "tc = timecopilot.TimeCopilot(\n",
+ " llm=\"openai:gpt-4o\",\n",
+ " forecasters=model_list\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d6f09e4",
+ "metadata": {},
+ "source": [
+ "## Forecasting \n",
+ "Once that setup is complete, you can use TimeCopilot with your adapted sktime model the same way you'd normally use TimeCopilot"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "e9122229",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "f95e1578",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "1it [00:00, 4.70it/s]\n",
+ "1it [00:00, 223.32it/s]\n",
+ "11it [00:00, 77.11it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = tc.forecast(\n",
+ " df=df,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "7355c143",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The 'AirPassengers' time series has a series length of 144 with a clear seasonal pattern identified using key features. The high 'seasonal_strength' of 0.981 suggests strong seasonality, evident from the 12-month seasonal period. The time series also exhibits trends, shown by a 'trend' score of 0.997, and moderate curvature at 1.069. The high autocorrelation 'x_acf1' at 0.948 indicates the persistence of patterns over time. The Holt-Winters parameters suggest a stable level (alpha ~1) with no trend component (beta ~0) and significant seasonal smoothing (gamma ~0.75). These features suggest that both trend and seasonality are prominent and need to be captured by the model.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(result.output.tsfeatures_analysis)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "86acfa60",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unique_id | \n",
+ " ds | \n",
+ " TrendForecaster | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AirPassengers | \n",
+ " 1961-01-01 | \n",
+ " 473.023018 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AirPassengers | \n",
+ " 1961-02-01 | \n",
+ " 475.729097 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AirPassengers | \n",
+ " 1961-03-01 | \n",
+ " 478.173296 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " AirPassengers | \n",
+ " 1961-04-01 | \n",
+ " 480.879374 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " AirPassengers | \n",
+ " 1961-05-01 | \n",
+ " 483.498159 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " AirPassengers | \n",
+ " 1961-06-01 | \n",
+ " 486.204237 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " AirPassengers | \n",
+ " 1961-07-01 | \n",
+ " 488.823023 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " AirPassengers | \n",
+ " 1961-08-01 | \n",
+ " 491.529101 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " AirPassengers | \n",
+ " 1961-09-01 | \n",
+ " 494.235179 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " AirPassengers | \n",
+ " 1961-10-01 | \n",
+ " 496.853964 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " AirPassengers | \n",
+ " 1961-11-01 | \n",
+ " 499.560042 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " AirPassengers | \n",
+ " 1961-12-01 | \n",
+ " 502.178827 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " AirPassengers | \n",
+ " 1962-01-01 | \n",
+ " 504.884906 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " AirPassengers | \n",
+ " 1962-02-01 | \n",
+ " 507.590984 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " AirPassengers | \n",
+ " 1962-03-01 | \n",
+ " 510.035183 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " AirPassengers | \n",
+ " 1962-04-01 | \n",
+ " 512.741261 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " AirPassengers | \n",
+ " 1962-05-01 | \n",
+ " 515.360046 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " AirPassengers | \n",
+ " 1962-06-01 | \n",
+ " 518.066125 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " AirPassengers | \n",
+ " 1962-07-01 | \n",
+ " 520.684910 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " AirPassengers | \n",
+ " 1962-08-01 | \n",
+ " 523.390988 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " AirPassengers | \n",
+ " 1962-09-01 | \n",
+ " 526.097066 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " AirPassengers | \n",
+ " 1962-10-01 | \n",
+ " 528.715851 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " AirPassengers | \n",
+ " 1962-11-01 | \n",
+ " 531.421929 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " AirPassengers | \n",
+ " 1962-12-01 | \n",
+ " 534.040714 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unique_id ds TrendForecaster\n",
+ "0 AirPassengers 1961-01-01 473.023018\n",
+ "1 AirPassengers 1961-02-01 475.729097\n",
+ "2 AirPassengers 1961-03-01 478.173296\n",
+ "3 AirPassengers 1961-04-01 480.879374\n",
+ "4 AirPassengers 1961-05-01 483.498159\n",
+ "5 AirPassengers 1961-06-01 486.204237\n",
+ "6 AirPassengers 1961-07-01 488.823023\n",
+ "7 AirPassengers 1961-08-01 491.529101\n",
+ "8 AirPassengers 1961-09-01 494.235179\n",
+ "9 AirPassengers 1961-10-01 496.853964\n",
+ "10 AirPassengers 1961-11-01 499.560042\n",
+ "11 AirPassengers 1961-12-01 502.178827\n",
+ "12 AirPassengers 1962-01-01 504.884906\n",
+ "13 AirPassengers 1962-02-01 507.590984\n",
+ "14 AirPassengers 1962-03-01 510.035183\n",
+ "15 AirPassengers 1962-04-01 512.741261\n",
+ "16 AirPassengers 1962-05-01 515.360046\n",
+ "17 AirPassengers 1962-06-01 518.066125\n",
+ "18 AirPassengers 1962-07-01 520.684910\n",
+ "19 AirPassengers 1962-08-01 523.390988\n",
+ "20 AirPassengers 1962-09-01 526.097066\n",
+ "21 AirPassengers 1962-10-01 528.715851\n",
+ "22 AirPassengers 1962-11-01 531.421929\n",
+ "23 AirPassengers 1962-12-01 534.040714"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result.fcst_df"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "timecopilot",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.14"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
index f61767cd..e0ad916f 100644
--- a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
+++ b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()"
+ "df.head()\n"
]
},
{
@@ -202,13 +202,14 @@
"metadata": {},
"outputs": [],
"source": [
+ "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
" AutoARIMA(),\n",
" Chronos(repo_id=\"amazon/chronos-bolt-small\"),\n",
- " Moirai(),\n",
- " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"),\n",
- " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"),\n",
+ " Moirai(), \n",
+ " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"), \n",
+ " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"), \n",
" SeasonalNaive(),\n",
" ]\n",
")"
@@ -235,14 +236,8 @@
"metadata": {},
"outputs": [],
"source": [
- "level = [\n",
- " 0,\n",
- " 20,\n",
- " 40,\n",
- " 60,\n",
- " 80,\n",
- "] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n",
- "cv_df = tcf.cross_validation(df=df, h=12, level=level)"
+ "level = [0, 20, 40, 60, 80] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n",
+ "cv_df = tcf.cross_validation(df=df, h=12, level=level) "
]
},
{
@@ -610,14 +605,12 @@
],
"source": [
"eval_df = evaluate(\n",
- " cv_df.drop(columns=[\"cutoff\"]),\n",
- " train_df=df.query(\"ds <= '2024-08-31'\"),\n",
+ " cv_df.drop(columns=[\"cutoff\"]), \n",
+ " train_df=df.query(\"ds <= '2024-08-31'\"), \n",
" metrics=[partial(mase, seasonality=12), scaled_crps],\n",
" level=level,\n",
")\n",
- "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n",
- " 3\n",
- ")"
+ "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)"
]
}
],
diff --git a/tests/test_agent.py b/tests/test_agent.py
index ab057cb7..1fb457e2 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -9,7 +9,9 @@
def build_stub_llm(output: dict) -> FunctionModel: # noqa: D401
- def _response_fn(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: # noqa: D401
+ def _response_fn(
+ messages: list[ModelMessage], info: AgentInfo
+ ) -> ModelResponse: # noqa: D401
payload = json.dumps(output)
return ModelResponse(
parts=[ToolCallPart(tool_name="final_result", args=payload)]
diff --git a/timecopilot/forecaster.py b/timecopilot/forecaster.py
index 4c418845..a27ac5f4 100644
--- a/timecopilot/forecaster.py
+++ b/timecopilot/forecaster.py
@@ -96,9 +96,11 @@ def _call_models(
res_df_model = fn(**known_kwargs, **kwargs)
res_df_model = res_df_model.rename(
columns={
- col: col.replace(self.fallback_model.alias, model.alias)
- if col.startswith(self.fallback_model.alias)
- else col
+ col: (
+ col.replace(self.fallback_model.alias, model.alias)
+ if col.startswith(self.fallback_model.alias)
+ else col
+ )
for col in res_df_model.columns
}
)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 9bea60b6..cb8b2a3d 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -26,7 +26,7 @@
def _zero_to_nan_pd(s: pd.Series) -> pd.Series:
- s = s.astype(float).copy()
+ s = s.astype(float)
s[s == 0] = np.nan
return s
@@ -37,7 +37,6 @@ def mase(
seasonality: int,
train_df: pd.DataFrame,
id_col: str = "unique_id",
- time_col: str = "ds",
target_col: str = "y",
cutoff_col: str = "cutoff",
) -> pd.DataFrame:
From 9f89defa8cbe6337960988b35767713734b63950 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Mon, 19 Jan 2026 10:11:12 +0530
Subject: [PATCH 14/21] fixing conflicts
---
docs/examples/agent-quickstart.ipynb | 2 -
...maly-detection-forecaster-quickstart.ipynb | 5 +-
docs/examples/aws-bedrock.ipynb | 5 +-
docs/examples/cryptocurrency-quickstart.ipynb | 20 +-
docs/examples/forecaster-quickstart.ipynb | 5 +-
docs/examples/gift-eval.ipynb | 3514 ++++++++---------
docs/examples/google-llms.ipynb | 6 +-
docs/examples/llm-providers.ipynb | 2 -
docs/examples/sktime.ipynb | 8 +-
9 files changed, 1780 insertions(+), 1787 deletions(-)
diff --git a/docs/examples/agent-quickstart.ipynb b/docs/examples/agent-quickstart.ipynb
index f6281ede..92114456 100644
--- a/docs/examples/agent-quickstart.ipynb
+++ b/docs/examples/agent-quickstart.ipynb
@@ -16,7 +16,6 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
- "\n",
"nest_asyncio.apply()"
]
},
@@ -36,7 +35,6 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "\n",
"from timecopilot import TimeCopilot"
]
},
diff --git a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
index 45bff4fb..18f6134a 100644
--- a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
+++ b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
@@ -35,7 +35,6 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "\n",
"from timecopilot import TimeCopilotForecaster"
]
},
@@ -202,9 +201,9 @@
"metadata": {},
"outputs": [],
"source": [
+ "from timecopilot.models.stats import SeasonalNaive, Theta\n",
"from timecopilot.models.foundation.chronos import Chronos\n",
- "from timecopilot.models.foundation.flowstate import FlowState\n",
- "from timecopilot.models.stats import SeasonalNaive, Theta"
+ "from timecopilot.models.foundation.flowstate import FlowState"
]
},
{
diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb
index 666bd52e..52f44523 100644
--- a/docs/examples/aws-bedrock.ipynb
+++ b/docs/examples/aws-bedrock.ipynb
@@ -37,12 +37,11 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
- "\n",
"nest_asyncio.apply()\n",
"\n",
- "import pandas as pd\n",
+ "from timecopilot import TimeCopilot\n",
"\n",
- "from timecopilot import TimeCopilot"
+ "import pandas as pd"
]
},
{
diff --git a/docs/examples/cryptocurrency-quickstart.ipynb b/docs/examples/cryptocurrency-quickstart.ipynb
index 846d74f5..4d4a39a0 100644
--- a/docs/examples/cryptocurrency-quickstart.ipynb
+++ b/docs/examples/cryptocurrency-quickstart.ipynb
@@ -16,21 +16,23 @@
"metadata": {},
"outputs": [],
"source": [
+ "import sys\n",
"\n",
"# Import all the timecopilot goodies\n",
- "import os\n",
+ "import timecopilot\n",
+ "from timecopilot import TimeCopilotForecaster\n",
+ "from timecopilot.models.stats import SeasonalNaive, Theta\n",
+ "from timecopilot.models.foundation.chronos import Chronos\n",
"\n",
- "import kagglehub\n",
- "import numpy as np\n",
+ "from timecopilot.models.prophet import Prophet\n",
+ "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive\n",
+ "from timecopilot.models.foundation.moirai import Moirai\n",
"\n",
"# Import basic libraries\n",
"import pandas as pd\n",
- "\n",
- "from timecopilot import TimeCopilotForecaster\n",
- "from timecopilot.models.foundation.chronos import Chronos\n",
- "from timecopilot.models.foundation.moirai import Moirai\n",
- "from timecopilot.models.prophet import Prophet\n",
- "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive, Theta"
+ "import kagglehub\n",
+ "import os\n",
+ "import numpy as np"
]
},
{
diff --git a/docs/examples/forecaster-quickstart.ipynb b/docs/examples/forecaster-quickstart.ipynb
index 9a0f93b0..6239177a 100644
--- a/docs/examples/forecaster-quickstart.ipynb
+++ b/docs/examples/forecaster-quickstart.ipynb
@@ -24,7 +24,6 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "\n",
"from timecopilot import TimeCopilotForecaster"
]
},
@@ -150,9 +149,9 @@
"metadata": {},
"outputs": [],
"source": [
- "from timecopilot.models.foundation.moirai import Moirai\n",
"from timecopilot.models.prophet import Prophet\n",
- "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive"
+ "from timecopilot.models.stats import AutoARIMA, AutoETS, SeasonalNaive\n",
+ "from timecopilot.models.foundation.moirai import Moirai"
]
},
{
diff --git a/docs/examples/gift-eval.ipynb b/docs/examples/gift-eval.ipynb
index 672c3e89..70347b4d 100644
--- a/docs/examples/gift-eval.ipynb
+++ b/docs/examples/gift-eval.ipynb
@@ -1,1782 +1,1782 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "azZ6BczQLj_B"
- },
- "source": [
- "# Foundation Model Ensemble (GIFT-Eval)\n",
- "\n",
- "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n",
- "\n",
- "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n",
- "\n",
- "\n",
- "\n",
- "## Model Description\n",
- "\n",
- "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n",
- "\n",
- "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n",
- "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n",
- "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n",
- "\n",
- "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n",
- "\n",
- "## TimeCopilot's Key Features\n",
- "\n",
- "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n",
- "- **Ensemble capabilities**: Built-in ensemble methods\n",
- "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n",
- "- **Dependency management**: Handles complex model requirements automatically\n",
- "- **GPU efficiency**: Optimized memory sharing and multi‑model execution"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M2SumVjnLj_C"
- },
- "source": [
- "## Requirements and Installation\n",
- "\n",
- "Install TimeCopilot library:\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "yQpa1NOvLj_D"
- },
- "outputs": [],
- "source": [
- "%pip install \"timecopilot>=0.0.22\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "tVsga7ogLj_D"
- },
- "source": [
- "## Dataset Setup\n",
- "\n",
- "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "mriqHxfOLj_D"
- },
- "outputs": [],
- "source": [
- "from timecopilot.gift_eval.eval import GIFTEval\n",
- "\n",
- "# TimeCopilot's built-in GIFT-Eval dataset downloader\n",
- "# Handles the complete benchmark dataset with all 97 configurations\n",
- "storage_path = \"./data/gift-eval\"\n",
- "GIFTEval.download_data(storage_path=storage_path)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "-SHX7gAtLj_D"
- },
- "source": [
- "## Model Implementation\n",
- "\n",
- "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "iWYKncn03jVy"
- },
- "outputs": [],
- "source": [
- "from timecopilot.models.ensembles.median import MedianEnsemble\n",
- "from timecopilot.models.foundation.chronos import Chronos\n",
- "from timecopilot.models.foundation.timesfm import TimesFM\n",
- "from timecopilot.models.foundation.tirex import TiRex\n",
- "from timecopilot.models.utils.forecaster import Forecaster\n",
- "\n",
- "batch_size = 64\n",
- "\n",
- "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n",
- "# Automatically handles dependency conflicts and GPU memory management\n",
- "ensemble = MedianEnsemble(\n",
- " models=[\n",
- " # Each model uses TimeCopilot's unified interface despite different architectures\n",
- " Chronos(\n",
- " repo_id=\"amazon/chronos-2\",\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " TimesFM(\n",
- " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " TiRex(\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " ],\n",
- " alias=\"TimeCopilot\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sCjZScu5Lj_E"
- },
- "source": [
- "## Evaluation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "yPKpn4e04KZD"
- },
- "source": [
- "### Defining the evaluator"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M2YcjoDF5NH7"
- },
- "source": [
- "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "RMvE9Cx9Lj_D"
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "\n",
- "from timecopilot.gift_eval.eval import GIFTEval\n",
- "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n",
- "\n",
- "\n",
- "def evaluate_forecaster(\n",
- " forecaster: Forecaster,\n",
- " dataset_name: str,\n",
- " term: str,\n",
- " output_path: str,\n",
- " storage_path: str,\n",
- " ):\n",
- " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
- "\n",
- " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
- " gifteval = GIFTEval(\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=output_path,\n",
- " storage_path=storage_path,\n",
- " )\n",
- "\n",
- " # GluonTS wrapper for GIFT-Eval compatibility\n",
- " # It can receive any Forecaster from TimeCopilot\n",
- " predictor = GluonTSPredictor(\n",
- " forecaster=forecaster,\n",
- " max_length=4_096,\n",
- " batch_size=1_024,\n",
- " )\n",
- "\n",
- " # Run evaluation with GIFT-Eval's standardized metrics\n",
- " gifteval.evaluate_predictor(predictor, batch_size=512)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "ajc2VPQl5cPY"
- },
- "source": [
- "### Performing evaluation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "74XuerNA5rWU"
- },
- "source": [
- "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "R41M3rDeLj_E"
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "\n",
- "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
- " combinations = [\n",
- " (\"m4_weekly\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"medium\"),\n",
- " (\"bizitobs_l2c/H\", \"long\"),\n",
- " ]\n",
- "\n",
- " for dataset_name, term in combinations:\n",
- " evaluate_forecaster(\n",
- " forecaster=ensemble,\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=\"./results/timecopilot\",\n",
- " storage_path=storage_path,\n",
- " )\n",
- "\n",
- " # Load consolidated results in GIFT-Eval format\n",
- " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "azZ6BczQLj_B"
+ },
+ "source": [
+ "# Foundation Model Ensemble (GIFT-Eval)\n",
+ "\n",
+ "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n",
+ "\n",
+ "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Model Description\n",
+ "\n",
+ "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n",
+ "\n",
+ "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n",
+ "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n",
+ "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n",
+ "\n",
+ "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n",
+ "\n",
+ "## TimeCopilot's Key Features\n",
+ "\n",
+ "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n",
+ "- **Ensemble capabilities**: Built-in ensemble methods\n",
+ "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n",
+ "- **Dependency management**: Handles complex model requirements automatically\n",
+ "- **GPU efficiency**: Optimized memory sharing and multi‑model execution"
+ ]
},
- "id": "cQ7WOUKCR_4h",
- "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661"
- },
- "outputs": [
{
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "eval_df"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M2SumVjnLj_C"
},
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " TimeCopilot | \n",
- " 220437.833920 | \n",
- " 220437.833920 | \n",
- " 239.903438 | \n",
- " 1.916661 | \n",
- " 0.058617 | \n",
- " 0.058292 | \n",
- " 14.666592 | \n",
- " 469.508077 | \n",
- " 0.085537 | \n",
- " 0.043707 | \n",
- " 0.034997 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " TimeCopilot | \n",
- " 54.628522 | \n",
- " 54.628522 | \n",
- " 4.459038 | \n",
- " 0.444425 | \n",
- " 0.385657 | \n",
- " 0.580057 | \n",
- " 2.696251 | \n",
- " 7.391111 | \n",
- " 0.398400 | \n",
- " 0.240354 | \n",
- " 0.186401 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " TimeCopilot | \n",
- " 71.800877 | \n",
- " 71.800877 | \n",
- " 4.851640 | \n",
- " 0.488632 | \n",
- " 0.470714 | \n",
- " 0.757992 | \n",
- " 3.374162 | \n",
- " 8.473540 | \n",
- " 0.513086 | \n",
- " 0.293774 | \n",
- " 0.232035 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " TimeCopilot | \n",
- " 83.786483 | \n",
- " 83.786483 | \n",
- " 5.340595 | \n",
- " 0.566997 | \n",
- " 0.619369 | \n",
- " 0.782812 | \n",
- " 4.585122 | \n",
- " 9.153496 | \n",
- " 0.559122 | \n",
- " 0.326219 | \n",
- " 0.261100 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "source": [
+ "## Requirements and Installation\n",
+ "\n",
+ "Install TimeCopilot library:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "yQpa1NOvLj_D"
+ },
+ "outputs": [],
+ "source": [
+ "%pip install \"timecopilot>=0.0.22\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tVsga7ogLj_D"
+ },
+ "source": [
+ "## Dataset Setup\n",
+ "\n",
+ "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "mriqHxfOLj_D"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.gift_eval.eval import GIFTEval\n",
+ "\n",
+ "# TimeCopilot's built-in GIFT-Eval dataset downloader\n",
+ "# Handles the complete benchmark dataset with all 97 configurations\n",
+ "storage_path = \"./data/gift-eval\"\n",
+ "GIFTEval.download_data(storage_path=storage_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-SHX7gAtLj_D"
+ },
+ "source": [
+ "## Model Implementation\n",
+ "\n",
+ "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "iWYKncn03jVy"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.models.ensembles.median import MedianEnsemble\n",
+ "from timecopilot.models.foundation.chronos import Chronos\n",
+ "from timecopilot.models.foundation.timesfm import TimesFM\n",
+ "from timecopilot.models.foundation.tirex import TiRex\n",
+ "from timecopilot.models.utils.forecaster import Forecaster\n",
+ "\n",
+ "batch_size = 64\n",
+ "\n",
+ "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n",
+ "# Automatically handles dependency conflicts and GPU memory management\n",
+ "ensemble = MedianEnsemble(\n",
+ " models=[\n",
+ " # Each model uses TimeCopilot's unified interface despite different architectures\n",
+ " Chronos(\n",
+ " repo_id=\"amazon/chronos-2\",\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " TimesFM(\n",
+ " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " TiRex(\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " ],\n",
+ " alias=\"TimeCopilot\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sCjZScu5Lj_E"
+ },
+ "source": [
+ "## Evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yPKpn4e04KZD"
+ },
+ "source": [
+ "### Defining the evaluator"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M2YcjoDF5NH7"
+ },
+ "source": [
+ "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "RMvE9Cx9Lj_D"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from timecopilot.gift_eval.eval import GIFTEval\n",
+ "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n",
+ "\n",
+ "\n",
+ "def evaluate_forecaster(\n",
+ " forecaster: Forecaster,\n",
+ " dataset_name: str,\n",
+ " term: str,\n",
+ " output_path: str,\n",
+ " storage_path: str,\n",
+ " ):\n",
+ " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
+ "\n",
+ " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
+ " gifteval = GIFTEval(\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=output_path,\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "\n",
+ " # GluonTS wrapper for GIFT-Eval compatibility\n",
+ " # It can receive any Forecaster from TimeCopilot\n",
+ " predictor = GluonTSPredictor(\n",
+ " forecaster=forecaster,\n",
+ " max_length=4_096,\n",
+ " batch_size=1_024,\n",
+ " )\n",
+ "\n",
+ " # Run evaluation with GIFT-Eval's standardized metrics\n",
+ " gifteval.evaluate_predictor(predictor, batch_size=512)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ajc2VPQl5cPY"
+ },
+ "source": [
+ "### Performing evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74XuerNA5rWU"
+ },
+ "source": [
+ "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "R41M3rDeLj_E"
+ },
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "\n",
+ "\n",
+ "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
+ " combinations = [\n",
+ " (\"m4_weekly\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"medium\"),\n",
+ " (\"bizitobs_l2c/H\", \"long\"),\n",
+ " ]\n",
+ "\n",
+ " for dataset_name, term in combinations:\n",
+ " evaluate_forecaster(\n",
+ " forecaster=ensemble,\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=f\"./results/timecopilot\",\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "\n",
+ " # Load consolidated results in GIFT-Eval format\n",
+ " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "id": "cQ7WOUKCR_4h",
+ "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "eval_df"
+ },
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " TimeCopilot | \n",
+ " 220437.833920 | \n",
+ " 220437.833920 | \n",
+ " 239.903438 | \n",
+ " 1.916661 | \n",
+ " 0.058617 | \n",
+ " 0.058292 | \n",
+ " 14.666592 | \n",
+ " 469.508077 | \n",
+ " 0.085537 | \n",
+ " 0.043707 | \n",
+ " 0.034997 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " TimeCopilot | \n",
+ " 54.628522 | \n",
+ " 54.628522 | \n",
+ " 4.459038 | \n",
+ " 0.444425 | \n",
+ " 0.385657 | \n",
+ " 0.580057 | \n",
+ " 2.696251 | \n",
+ " 7.391111 | \n",
+ " 0.398400 | \n",
+ " 0.240354 | \n",
+ " 0.186401 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " TimeCopilot | \n",
+ " 71.800877 | \n",
+ " 71.800877 | \n",
+ " 4.851640 | \n",
+ " 0.488632 | \n",
+ " 0.470714 | \n",
+ " 0.757992 | \n",
+ " 3.374162 | \n",
+ " 8.473540 | \n",
+ " 0.513086 | \n",
+ " 0.293774 | \n",
+ " 0.232035 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " TimeCopilot | \n",
+ " 83.786483 | \n",
+ " 83.786483 | \n",
+ " 5.340595 | \n",
+ " 0.566997 | \n",
+ " 0.619369 | \n",
+ " 0.782812 | \n",
+ " 4.585122 | \n",
+ " 9.153496 | \n",
+ " 0.559122 | \n",
+ " 0.326219 | \n",
+ " 0.261100 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short TimeCopilot 220437.833920 \n",
+ "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n",
+ "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n",
+ "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 220437.833920 239.903438 1.916661 \n",
+ "1 54.628522 4.459038 0.444425 \n",
+ "2 71.800877 4.851640 0.488632 \n",
+ "3 83.786483 5.340595 0.566997 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.058617 0.058292 14.666592 \n",
+ "1 0.385657 0.580057 2.696251 \n",
+ "2 0.470714 0.757992 3.374162 \n",
+ "3 0.619369 0.782812 4.585122 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 469.508077 0.085537 0.043707 \n",
+ "1 7.391111 0.398400 0.240354 \n",
+ "2 8.473540 0.513086 0.293774 \n",
+ "3 9.153496 0.559122 0.326219 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.034997 Econ/Fin 1 \n",
+ "1 0.186401 Web/CloudOps 7 \n",
+ "2 0.232035 Web/CloudOps 7 \n",
+ "3 0.261100 Web/CloudOps 7 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short TimeCopilot 220437.833920 \n",
- "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n",
- "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n",
- "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 220437.833920 239.903438 1.916661 \n",
- "1 54.628522 4.459038 0.444425 \n",
- "2 71.800877 4.851640 0.488632 \n",
- "3 83.786483 5.340595 0.566997 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.058617 0.058292 14.666592 \n",
- "1 0.385657 0.580057 2.696251 \n",
- "2 0.470714 0.757992 3.374162 \n",
- "3 0.619369 0.782812 4.585122 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 469.508077 0.085537 0.043707 \n",
- "1 7.391111 0.398400 0.240354 \n",
- "2 8.473540 0.513086 0.293774 \n",
- "3 9.153496 0.559122 0.326219 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.034997 Econ/Fin 1 \n",
- "1 0.186401 Web/CloudOps 7 \n",
- "2 0.232035 Web/CloudOps 7 \n",
- "3 0.261100 Web/CloudOps 7 "
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " from IPython.display import display\n",
+ "\n",
+ " display(eval_df)"
]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "if torch.cuda.is_available():\n",
- " from IPython.display import display\n",
- "\n",
- " display(eval_df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7JCiHenv6Dma"
- },
- "source": [
- "You can access the complete combination of datasets with the following:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "RmmMtHpA6HIu"
- },
- "outputs": [],
- "source": [
- "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
},
- "id": "2WBJ-wjv6Kz6",
- "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0"
- },
- "outputs": [
{
- "data": {
- "text/plain": [
- "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7JCiHenv6Dma"
+ },
+ "source": [
+ "You can access the complete combination of datasets with the following:"
]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "DATASETS_WITH_TERMS[:3]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
},
- "id": "7I9OQThW6OD8",
- "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb"
- },
- "outputs": [
{
- "data": {
- "text/plain": [
- "97"
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "RmmMtHpA6HIu"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS"
]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(DATASETS_WITH_TERMS)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "BXupvNSFVWhG"
- },
- "source": [
- "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "xK8KTPic6UzR"
- },
- "source": [
- "## Reproducibility statement"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "g0-oVisu6XX3"
- },
- "source": [
- "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "0kJwJ8mX6TH2"
- },
- "outputs": [],
- "source": [
- "from timecopilot.models.stats import SeasonalNaive\n",
- "\n",
- "combinations = [\n",
- " (\"m4_weekly\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"medium\"),\n",
- " (\"bizitobs_l2c/H\", \"long\"),\n",
- "]\n",
- "\n",
- "for dataset_name, term in combinations:\n",
- " evaluate_forecaster(\n",
- " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=\"./results/seasonal_naive\",\n",
- " storage_path=storage_path,\n",
- " )\n",
- "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
},
- "id": "0S-Oog_2UTCI",
- "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1"
- },
- "outputs": [
{
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "eval_df_sn"
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2WBJ-wjv6Kz6",
+ "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0"
},
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " Seasonal_Naive | \n",
- " 453525.145918 | \n",
- " 453525.145918 | \n",
- " 347.991483 | \n",
- " 2.777295 | \n",
- " 0.089373 | \n",
- " 0.091613 | \n",
- " 26.631225 | \n",
- " 673.442756 | \n",
- " 0.122691 | \n",
- " 0.063399 | \n",
- " 0.060870 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " Seasonal_Naive | \n",
- " 281.843068 | \n",
- " 281.843068 | \n",
- " 12.531653 | \n",
- " 1.214064 | \n",
- " 1.360590 | \n",
- " 1.138373 | \n",
- " 7.486931 | \n",
- " 16.788182 | \n",
- " 0.904926 | \n",
- " 0.675488 | \n",
- " 0.521168 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " Seasonal_Naive | \n",
- " 456.373289 | \n",
- " 456.373289 | \n",
- " 15.667392 | \n",
- " 1.510286 | \n",
- " 1.691291 | \n",
- " 1.402410 | \n",
- " 18.533654 | \n",
- " 21.362895 | \n",
- " 1.293556 | \n",
- " 0.948684 | \n",
- " 0.904205 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " Seasonal_Naive | \n",
- " 309.272222 | \n",
- " 309.272222 | \n",
- " 13.635488 | \n",
- " 1.426054 | \n",
- " 2.438311 | \n",
- " 0.916854 | \n",
- " 22.036198 | \n",
- " 17.586137 | \n",
- " 1.074212 | \n",
- " 0.832895 | \n",
- " 0.941065 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
- "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
- "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
- "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 453525.145918 347.991483 2.777295 \n",
- "1 281.843068 12.531653 1.214064 \n",
- "2 456.373289 15.667392 1.510286 \n",
- "3 309.272222 13.635488 1.426054 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.089373 0.091613 26.631225 \n",
- "1 1.360590 1.138373 7.486931 \n",
- "2 1.691291 1.402410 18.533654 \n",
- "3 2.438311 0.916854 22.036198 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 673.442756 0.122691 0.063399 \n",
- "1 16.788182 0.904926 0.675488 \n",
- "2 21.362895 1.293556 0.948684 \n",
- "3 17.586137 1.074212 0.832895 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.060870 Econ/Fin 1 \n",
- "1 0.521168 Web/CloudOps 7 \n",
- "2 0.904205 Web/CloudOps 7 \n",
- "3 0.941065 Web/CloudOps 7 "
+ "source": [
+ "DATASETS_WITH_TERMS[:3]"
]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "eval_df_sn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "id": "D1T6ar_H8Zo8"
- },
- "outputs": [],
- "source": [
- "official_eval_sn = pd.read_csv(\n",
- " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "id": "NETa8_6Y8ip-"
- },
- "outputs": [],
- "source": [
- "official_eval_sn = official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
},
- "id": "hkH2NLKMUVii",
- "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29"
- },
- "outputs": [
{
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "official_eval_sn"
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "7I9OQThW6OD8",
+ "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb"
},
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " Seasonal_Naive | \n",
- " 453525.145918 | \n",
- " 453525.145918 | \n",
- " 347.991483 | \n",
- " 2.777295 | \n",
- " 0.089373 | \n",
- " 0.091613 | \n",
- " 26.631225 | \n",
- " 673.442756 | \n",
- " 0.122691 | \n",
- " 0.063399 | \n",
- " 0.060870 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " Seasonal_Naive | \n",
- " 281.843068 | \n",
- " 281.843068 | \n",
- " 12.531653 | \n",
- " 1.214064 | \n",
- " 1.360590 | \n",
- " 1.138373 | \n",
- " 7.486931 | \n",
- " 16.788182 | \n",
- " 0.904926 | \n",
- " 0.675488 | \n",
- " 0.521168 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " Seasonal_Naive | \n",
- " 456.373289 | \n",
- " 456.373289 | \n",
- " 15.667392 | \n",
- " 1.510286 | \n",
- " 1.691291 | \n",
- " 1.402410 | \n",
- " 18.533654 | \n",
- " 21.362895 | \n",
- " 1.293556 | \n",
- " 0.948684 | \n",
- " 0.904205 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " Seasonal_Naive | \n",
- " 309.272222 | \n",
- " 309.272222 | \n",
- " 13.635488 | \n",
- " 1.426054 | \n",
- " 2.438311 | \n",
- " 0.916854 | \n",
- " 22.036198 | \n",
- " 17.586137 | \n",
- " 1.074212 | \n",
- " 0.832895 | \n",
- " 0.941065 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "97"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
- "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
- "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
- "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 453525.145918 347.991483 2.777295 \n",
- "1 281.843068 12.531653 1.214064 \n",
- "2 456.373289 15.667392 1.510286 \n",
- "3 309.272222 13.635488 1.426054 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.089373 0.091613 26.631225 \n",
- "1 1.360590 1.138373 7.486931 \n",
- "2 1.691291 1.402410 18.533654 \n",
- "3 2.438311 0.916854 22.036198 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 673.442756 0.122691 0.063399 \n",
- "1 16.788182 0.904926 0.675488 \n",
- "2 21.362895 1.293556 0.948684 \n",
- "3 17.586137 1.074212 0.832895 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.060870 Econ/Fin 1 \n",
- "1 0.521168 Web/CloudOps 7 \n",
- "2 0.904205 Web/CloudOps 7 \n",
- "3 0.941065 Web/CloudOps 7 "
+ "source": [
+ "len(DATASETS_WITH_TERMS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BXupvNSFVWhG"
+ },
+ "source": [
+ "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xK8KTPic6UzR"
+ },
+ "source": [
+ "## Reproducibility statement"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "g0-oVisu6XX3"
+ },
+ "source": [
+ "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0kJwJ8mX6TH2"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.models.stats import SeasonalNaive\n",
+ "\n",
+ "combinations = [\n",
+ " (\"m4_weekly\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"medium\"),\n",
+ " (\"bizitobs_l2c/H\", \"long\"),\n",
+ "]\n",
+ "\n",
+ "for dataset_name, term in combinations:\n",
+ " evaluate_forecaster(\n",
+ " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=f\"./results/seasonal_naive\",\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "id": "0S-Oog_2UTCI",
+ "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "eval_df_sn"
+ },
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " Seasonal_Naive | \n",
+ " 453525.145918 | \n",
+ " 453525.145918 | \n",
+ " 347.991483 | \n",
+ " 2.777295 | \n",
+ " 0.089373 | \n",
+ " 0.091613 | \n",
+ " 26.631225 | \n",
+ " 673.442756 | \n",
+ " 0.122691 | \n",
+ " 0.063399 | \n",
+ " 0.060870 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " Seasonal_Naive | \n",
+ " 281.843068 | \n",
+ " 281.843068 | \n",
+ " 12.531653 | \n",
+ " 1.214064 | \n",
+ " 1.360590 | \n",
+ " 1.138373 | \n",
+ " 7.486931 | \n",
+ " 16.788182 | \n",
+ " 0.904926 | \n",
+ " 0.675488 | \n",
+ " 0.521168 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " Seasonal_Naive | \n",
+ " 456.373289 | \n",
+ " 456.373289 | \n",
+ " 15.667392 | \n",
+ " 1.510286 | \n",
+ " 1.691291 | \n",
+ " 1.402410 | \n",
+ " 18.533654 | \n",
+ " 21.362895 | \n",
+ " 1.293556 | \n",
+ " 0.948684 | \n",
+ " 0.904205 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " Seasonal_Naive | \n",
+ " 309.272222 | \n",
+ " 309.272222 | \n",
+ " 13.635488 | \n",
+ " 1.426054 | \n",
+ " 2.438311 | \n",
+ " 0.916854 | \n",
+ " 22.036198 | \n",
+ " 17.586137 | \n",
+ " 1.074212 | \n",
+ " 0.832895 | \n",
+ " 0.941065 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
+ "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
+ "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
+ "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 453525.145918 347.991483 2.777295 \n",
+ "1 281.843068 12.531653 1.214064 \n",
+ "2 456.373289 15.667392 1.510286 \n",
+ "3 309.272222 13.635488 1.426054 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.089373 0.091613 26.631225 \n",
+ "1 1.360590 1.138373 7.486931 \n",
+ "2 1.691291 1.402410 18.533654 \n",
+ "3 2.438311 0.916854 22.036198 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 673.442756 0.122691 0.063399 \n",
+ "1 16.788182 0.904926 0.675488 \n",
+ "2 21.362895 1.293556 0.948684 \n",
+ "3 17.586137 1.074212 0.832895 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.060870 Econ/Fin 1 \n",
+ "1 0.521168 Web/CloudOps 7 \n",
+ "2 0.904205 Web/CloudOps 7 \n",
+ "3 0.941065 Web/CloudOps 7 "
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eval_df_sn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "id": "D1T6ar_H8Zo8"
+ },
+ "outputs": [],
+ "source": [
+ "official_eval_sn = pd.read_csv(\n",
+ " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "id": "NETa8_6Y8ip-"
+ },
+ "outputs": [],
+ "source": [
+ "official_eval_sn = official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
+ },
+ "id": "hkH2NLKMUVii",
+ "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "official_eval_sn"
+ },
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " Seasonal_Naive | \n",
+ " 453525.145918 | \n",
+ " 453525.145918 | \n",
+ " 347.991483 | \n",
+ " 2.777295 | \n",
+ " 0.089373 | \n",
+ " 0.091613 | \n",
+ " 26.631225 | \n",
+ " 673.442756 | \n",
+ " 0.122691 | \n",
+ " 0.063399 | \n",
+ " 0.060870 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " Seasonal_Naive | \n",
+ " 281.843068 | \n",
+ " 281.843068 | \n",
+ " 12.531653 | \n",
+ " 1.214064 | \n",
+ " 1.360590 | \n",
+ " 1.138373 | \n",
+ " 7.486931 | \n",
+ " 16.788182 | \n",
+ " 0.904926 | \n",
+ " 0.675488 | \n",
+ " 0.521168 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " Seasonal_Naive | \n",
+ " 456.373289 | \n",
+ " 456.373289 | \n",
+ " 15.667392 | \n",
+ " 1.510286 | \n",
+ " 1.691291 | \n",
+ " 1.402410 | \n",
+ " 18.533654 | \n",
+ " 21.362895 | \n",
+ " 1.293556 | \n",
+ " 0.948684 | \n",
+ " 0.904205 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " Seasonal_Naive | \n",
+ " 309.272222 | \n",
+ " 309.272222 | \n",
+ " 13.635488 | \n",
+ " 1.426054 | \n",
+ " 2.438311 | \n",
+ " 0.916854 | \n",
+ " 22.036198 | \n",
+ " 17.586137 | \n",
+ " 1.074212 | \n",
+ " 0.832895 | \n",
+ " 0.941065 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
+ "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
+ "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
+ "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 453525.145918 347.991483 2.777295 \n",
+ "1 281.843068 12.531653 1.214064 \n",
+ "2 456.373289 15.667392 1.510286 \n",
+ "3 309.272222 13.635488 1.426054 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.089373 0.091613 26.631225 \n",
+ "1 1.360590 1.138373 7.486931 \n",
+ "2 1.691291 1.402410 18.533654 \n",
+ "3 2.438311 0.916854 22.036198 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 673.442756 0.122691 0.063399 \n",
+ "1 16.788182 0.904926 0.675488 \n",
+ "2 21.362895 1.293556 0.948684 \n",
+ "3 17.586137 1.074212 0.832895 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.060870 Econ/Fin 1 \n",
+ "1 0.521168 Web/CloudOps 7 \n",
+ "2 0.904205 Web/CloudOps 7 \n",
+ "3 0.941065 Web/CloudOps 7 "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "official_eval_sn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "id": "OCifh_5D9B05"
+ },
+ "outputs": [],
+ "source": [
+ "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0wapKMgTFScM"
+ },
+ "source": [
+ "## Changelog\n",
+ "\n",
+ "### **2025-11-06**\n",
+ "\n",
+ "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n",
+ "\n",
+ "### **2025-08-05**\n",
+ "\n",
+ "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)."
]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
}
- ],
- "source": [
- "official_eval_sn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "id": "OCifh_5D9B05"
- },
- "outputs": [],
- "source": [
- "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0wapKMgTFScM"
- },
- "source": [
- "## Changelog\n",
- "\n",
- "### **2025-11-06**\n",
- "\n",
- "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n",
- "\n",
- "### **2025-08-05**\n",
- "\n",
- "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)."
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": [],
- "runtime_attributes": {
- "runtime_version": "2025.07"
- }
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": [],
+ "runtime_attributes": {
+ "runtime_version": "2025.07"
+ }
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.12"
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/docs/examples/google-llms.ipynb b/docs/examples/google-llms.ipynb
index 16f5291c..7abb7bc6 100644
--- a/docs/examples/google-llms.ipynb
+++ b/docs/examples/google-llms.ipynb
@@ -38,12 +38,11 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
- "\n",
"nest_asyncio.apply()\n",
"\n",
- "import pandas as pd\n",
+ "from timecopilot import TimeCopilot\n",
"\n",
- "from timecopilot import TimeCopilot"
+ "import pandas as pd"
]
},
{
@@ -188,6 +187,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "from pydantic_ai import Agent\n",
"from pydantic_ai.models.google import GoogleModel\n",
"from pydantic_ai.providers.google import GoogleProvider\n",
"\n",
diff --git a/docs/examples/llm-providers.ipynb b/docs/examples/llm-providers.ipynb
index d3f9afce..bab596fa 100644
--- a/docs/examples/llm-providers.ipynb
+++ b/docs/examples/llm-providers.ipynb
@@ -35,7 +35,6 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
- "\n",
"nest_asyncio.apply()"
]
},
@@ -47,7 +46,6 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "\n",
"from timecopilot import TimeCopilot\n"
]
},
diff --git a/docs/examples/sktime.ipynb b/docs/examples/sktime.ipynb
index dae92ba6..276e40b6 100644
--- a/docs/examples/sktime.ipynb
+++ b/docs/examples/sktime.ipynb
@@ -26,12 +26,11 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
- "\n",
"nest_asyncio.apply()\n",
"\n",
- "import pandas as pd\n",
- "\n",
- "import timecopilot"
+ "import timecopilot\n",
+ "import sktime as skt\n",
+ "import pandas as pd"
]
},
{
@@ -54,7 +53,6 @@
"outputs": [],
"source": [
"from sktime.forecasting.trend import TrendForecaster\n",
- "\n",
"from timecopilot.models.adapters.sktime import SKTimeAdapter\n",
"\n",
"trend_forecaster = TrendForecaster()\n",
From 8fabf7538b02a9e82d0b1902b7e75fa71444b410 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Mon, 19 Jan 2026 10:32:19 +0530
Subject: [PATCH 15/21] Apply ruff formatting fixes
Applied ruff-format changes to satisfy pre-commit checks.
No functional changes.
---
docs/examples/agent-quickstart.ipynb | 4 +-
...maly-detection-forecaster-quickstart.ipynb | 3 +-
docs/examples/aws-bedrock.ipynb | 9 +-
docs/examples/chronos-family.ipynb | 12 +-
docs/examples/cryptocurrency-quickstart.ipynb | 85 +-
docs/examples/forecaster-quickstart.ipynb | 11 +-
docs/examples/gift-eval.ipynb | 3516 +++++++++--------
docs/examples/google-llms.ipynb | 7 +-
docs/examples/llm-providers.ipynb | 18 +-
docs/examples/sktime.ipynb | 6 +-
...ndation-models-comparison-quickstart.ipynb | 27 +-
tests/test_agent.py | 4 +-
12 files changed, 1853 insertions(+), 1849 deletions(-)
diff --git a/docs/examples/agent-quickstart.ipynb b/docs/examples/agent-quickstart.ipynb
index 92114456..e1f77b63 100644
--- a/docs/examples/agent-quickstart.ipynb
+++ b/docs/examples/agent-quickstart.ipynb
@@ -16,6 +16,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()"
]
},
@@ -157,11 +158,10 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tc = TimeCopilot(\n",
" llm=\"openai:gpt-4o\",\n",
" retries=3,\n",
- ")\n"
+ ")"
]
},
{
diff --git a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
index 18f6134a..ee080351 100644
--- a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
+++ b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb
@@ -183,7 +183,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/the_anomaly_tour.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df\n"
+ "df"
]
},
{
@@ -222,7 +222,6 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb
index 52f44523..9b691e08 100644
--- a/docs/examples/aws-bedrock.ipynb
+++ b/docs/examples/aws-bedrock.ipynb
@@ -37,6 +37,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()\n",
"\n",
"from timecopilot import TimeCopilot\n",
@@ -164,7 +165,7 @@
"outputs": [],
"source": [
"tc = TimeCopilot(\n",
- " llm='bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0',\n",
+ " llm=\"bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0\",\n",
")"
]
},
@@ -187,9 +188,7 @@
"source": [
"from pydantic_ai.models.bedrock import BedrockConverseModel\n",
"\n",
- "model = BedrockConverseModel(\n",
- " 'us.anthropic.claude-3-5-sonnet-20241022-v2:0'\n",
- ")\n",
+ "model = BedrockConverseModel(\"us.anthropic.claude-3-5-sonnet-20241022-v2:0\")\n",
"tc = TimeCopilot(\n",
" llm=model,\n",
")"
@@ -219,7 +218,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")\n"
+ "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")"
]
},
{
diff --git a/docs/examples/chronos-family.ipynb b/docs/examples/chronos-family.ipynb
index f9bb2021..84174108 100644
--- a/docs/examples/chronos-family.ipynb
+++ b/docs/examples/chronos-family.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()\n"
+ "df.head()"
]
},
{
@@ -241,7 +241,7 @@
"outputs": [],
"source": [
"level = [20, 40, 60, 80]\n",
- "cv_df = tcf.cross_validation(df=df, h=12, level=level) "
+ "cv_df = tcf.cross_validation(df=df, h=12, level=level)"
]
},
{
@@ -638,12 +638,14 @@
],
"source": [
"eval_df = evaluate(\n",
- " cv_df.drop(columns=[\"cutoff\"]), \n",
- " train_df=df.query(\"ds <= '2024-08-31'\"), \n",
+ " cv_df.drop(columns=[\"cutoff\"]),\n",
+ " train_df=df.query(\"ds <= '2024-08-31'\"),\n",
" metrics=[partial(mase, seasonality=12), scaled_crps],\n",
" level=level,\n",
")\n",
- "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)"
+ "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n",
+ " 3\n",
+ ")"
]
}
],
diff --git a/docs/examples/cryptocurrency-quickstart.ipynb b/docs/examples/cryptocurrency-quickstart.ipynb
index 4d4a39a0..e54bf0ef 100644
--- a/docs/examples/cryptocurrency-quickstart.ipynb
+++ b/docs/examples/cryptocurrency-quickstart.ipynb
@@ -63,7 +63,7 @@
"outputs": [],
"source": [
"files = os.listdir(path)\n",
- "files = [path+'/'+x for x in files]"
+ "files = [path + \"/\" + x for x in files]"
]
},
{
@@ -198,18 +198,18 @@
"# Read all filez and set them up to the readable structure for timecopilot\n",
"for file in files:\n",
" temp_df = pd.read_csv(file)\n",
- " temp_df = temp_df[['Symbol','Date','Close']]\n",
- " temp_df.columns = ['unique_id','ds','y']\n",
- " big_df = pd.concat([big_df,temp_df])\n",
+ " temp_df = temp_df[[\"Symbol\", \"Date\", \"Close\"]]\n",
+ " temp_df.columns = [\"unique_id\", \"ds\", \"y\"]\n",
+ " big_df = pd.concat([big_df, temp_df])\n",
"\n",
"big_df = big_df.reset_index(drop=True)\n",
"big_df[\"ds\"] = pd.to_datetime(big_df[\"ds\"], dayfirst=True, errors=\"coerce\")\n",
"\n",
- "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further. \n",
+ "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further.\n",
"# big_df = big_df[big_df.ds >= \"2021-01-01\"]\n",
- "cryptos=['MIOTA','XEM','ETH','LTC','DOGE','CRO','USDC','ADA']\n",
- "big_df=big_df[big_df.unique_id.isin(cryptos)]\n",
- "big_df=big_df.reset_index(drop=True)\n",
+ "cryptos = [\"MIOTA\", \"XEM\", \"ETH\", \"LTC\", \"DOGE\", \"CRO\", \"USDC\", \"ADA\"]\n",
+ "big_df = big_df[big_df.unique_id.isin(cryptos)]\n",
+ "big_df = big_df.reset_index(drop=True)\n",
"big_df"
]
},
@@ -341,6 +341,7 @@
" df_out.loc[idx, col] = np.nan\n",
" return df_out\n",
"\n",
+ "\n",
"df_missing = add_missing(big_df, col=\"y\", frac=0.03, seed=42)\n",
"df_missing = df_missing.sample(frac=1, random_state=42).reset_index(drop=True)\n",
"print(df_missing)"
@@ -709,12 +710,14 @@
}
],
"source": [
- "anomaly_summary_xlm=anomalies_df[\n",
+ "anomaly_summary_xlm = anomalies_df[\n",
" # (anomalies_df.unique_id=='SOL') & \\\n",
- " ((anomalies_df['Chronos-anomaly']==True) | \\\n",
- " (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
- " (anomalies_df['Theta-anomaly']==True)\n",
- " )].reset_index(drop=True)\n",
+ " (\n",
+ " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
+ " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
+ " | (anomalies_df[\"Theta-anomaly\"] == True)\n",
+ " )\n",
+ "].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -954,12 +957,14 @@
}
],
"source": [
- "anomaly_summary_xlm=anomalies_df[\n",
- " (anomalies_df.unique_id=='ADA') & \\\n",
- " ((anomalies_df['Chronos-anomaly']==True) | \\\n",
- " (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
- " (anomalies_df['Theta-anomaly']==True)\n",
- " )].reset_index(drop=True)\n",
+ "anomaly_summary_xlm = anomalies_df[\n",
+ " (anomalies_df.unique_id == \"ADA\")\n",
+ " & (\n",
+ " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
+ " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
+ " | (anomalies_df[\"Theta-anomaly\"] == True)\n",
+ " )\n",
+ "].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -1199,12 +1204,14 @@
}
],
"source": [
- "anomaly_summary_xlm=anomalies_df[\n",
- " (anomalies_df.unique_id=='ADA') & \\\n",
- " ((anomalies_df['Chronos-anomaly']==True) & \\\n",
- " (anomalies_df['SeasonalNaive-anomaly']==True) \\\n",
- " # (anomalies_df['Theta-anomaly']==True)\n",
- " )].reset_index(drop=True)\n",
+ "anomaly_summary_xlm = anomalies_df[\n",
+ " (anomalies_df.unique_id == \"ADA\")\n",
+ " & (\n",
+ " (anomalies_df[\"Chronos-anomaly\"] == True)\n",
+ " & (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
+ " # (anomalies_df['Theta-anomaly']==True)\n",
+ " )\n",
+ "].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
@@ -1241,12 +1248,12 @@
"source": [
"tcf1 = TimeCopilotForecaster(\n",
" models=[\n",
- " AutoARIMA(), \n",
+ " AutoARIMA(),\n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
" Theta(),\n",
- " AutoETS(), \n",
- " Moirai(), \n",
- " Prophet(), \n",
+ " AutoETS(),\n",
+ " Moirai(),\n",
+ " Prophet(),\n",
" SeasonalNaive(),\n",
" ]\n",
")"
@@ -1259,7 +1266,7 @@
"metadata": {},
"outputs": [],
"source": [
- "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80,90])"
+ "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80, 90])"
]
},
{
@@ -1303,9 +1310,9 @@
"metadata": {},
"outputs": [],
"source": [
- "eth_fcst_normal=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
- " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
- " .reset_index(drop=True)"
+ "eth_fcst_normal = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
+ " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
+ "].reset_index(drop=True)"
]
},
{
@@ -1345,9 +1352,9 @@
"metadata": {},
"outputs": [],
"source": [
- "eth_fcst_missing=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
- " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
- " .reset_index(drop=True)"
+ "eth_fcst_missing = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
+ " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
+ "].reset_index(drop=True)"
]
},
{
@@ -1515,9 +1522,9 @@
}
],
"source": [
- "compare=eth_fcst_normal.merge(eth_fcst_missing,on=['ds','unique_id'])\n",
- "compare['dif']=abs(compare['Chronos_x']-compare['Chronos_y'])\n",
- "print(compare['dif'].sum())"
+ "compare = eth_fcst_normal.merge(eth_fcst_missing, on=[\"ds\", \"unique_id\"])\n",
+ "compare[\"dif\"] = abs(compare[\"Chronos_x\"] - compare[\"Chronos_y\"])\n",
+ "print(compare[\"dif\"].sum())"
]
},
{
diff --git a/docs/examples/forecaster-quickstart.ipynb b/docs/examples/forecaster-quickstart.ipynb
index 6239177a..88b27704 100644
--- a/docs/examples/forecaster-quickstart.ipynb
+++ b/docs/examples/forecaster-quickstart.ipynb
@@ -131,7 +131,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()\n"
+ "df.head()"
]
},
{
@@ -170,13 +170,12 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
- " AutoARIMA(), \n",
- " AutoETS(), \n",
- " Moirai(), \n",
- " Prophet(), \n",
+ " AutoARIMA(),\n",
+ " AutoETS(),\n",
+ " Moirai(),\n",
+ " Prophet(),\n",
" SeasonalNaive(),\n",
" ]\n",
")"
diff --git a/docs/examples/gift-eval.ipynb b/docs/examples/gift-eval.ipynb
index 70347b4d..7b499323 100644
--- a/docs/examples/gift-eval.ipynb
+++ b/docs/examples/gift-eval.ipynb
@@ -1,1782 +1,1784 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "azZ6BczQLj_B"
- },
- "source": [
- "# Foundation Model Ensemble (GIFT-Eval)\n",
- "\n",
- "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n",
- "\n",
- "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n",
- "\n",
- "\n",
- "\n",
- "## Model Description\n",
- "\n",
- "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n",
- "\n",
- "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n",
- "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n",
- "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n",
- "\n",
- "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n",
- "\n",
- "## TimeCopilot's Key Features\n",
- "\n",
- "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n",
- "- **Ensemble capabilities**: Built-in ensemble methods\n",
- "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n",
- "- **Dependency management**: Handles complex model requirements automatically\n",
- "- **GPU efficiency**: Optimized memory sharing and multi‑model execution"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M2SumVjnLj_C"
- },
- "source": [
- "## Requirements and Installation\n",
- "\n",
- "Install TimeCopilot library:\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "yQpa1NOvLj_D"
- },
- "outputs": [],
- "source": [
- "%pip install \"timecopilot>=0.0.22\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "tVsga7ogLj_D"
- },
- "source": [
- "## Dataset Setup\n",
- "\n",
- "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "mriqHxfOLj_D"
- },
- "outputs": [],
- "source": [
- "from timecopilot.gift_eval.eval import GIFTEval\n",
- "\n",
- "# TimeCopilot's built-in GIFT-Eval dataset downloader\n",
- "# Handles the complete benchmark dataset with all 97 configurations\n",
- "storage_path = \"./data/gift-eval\"\n",
- "GIFTEval.download_data(storage_path=storage_path)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "-SHX7gAtLj_D"
- },
- "source": [
- "## Model Implementation\n",
- "\n",
- "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "iWYKncn03jVy"
- },
- "outputs": [],
- "source": [
- "from timecopilot.models.ensembles.median import MedianEnsemble\n",
- "from timecopilot.models.foundation.chronos import Chronos\n",
- "from timecopilot.models.foundation.timesfm import TimesFM\n",
- "from timecopilot.models.foundation.tirex import TiRex\n",
- "from timecopilot.models.utils.forecaster import Forecaster\n",
- "\n",
- "batch_size = 64\n",
- "\n",
- "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n",
- "# Automatically handles dependency conflicts and GPU memory management\n",
- "ensemble = MedianEnsemble(\n",
- " models=[\n",
- " # Each model uses TimeCopilot's unified interface despite different architectures\n",
- " Chronos(\n",
- " repo_id=\"amazon/chronos-2\",\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " TimesFM(\n",
- " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " TiRex(\n",
- " batch_size=batch_size,\n",
- " ),\n",
- " ],\n",
- " alias=\"TimeCopilot\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sCjZScu5Lj_E"
- },
- "source": [
- "## Evaluation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "yPKpn4e04KZD"
- },
- "source": [
- "### Defining the evaluator"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M2YcjoDF5NH7"
- },
- "source": [
- "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "RMvE9Cx9Lj_D"
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from timecopilot.gift_eval.eval import GIFTEval\n",
- "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n",
- "\n",
- "\n",
- "def evaluate_forecaster(\n",
- " forecaster: Forecaster,\n",
- " dataset_name: str,\n",
- " term: str,\n",
- " output_path: str,\n",
- " storage_path: str,\n",
- " ):\n",
- " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
- "\n",
- " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
- " gifteval = GIFTEval(\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=output_path,\n",
- " storage_path=storage_path,\n",
- " )\n",
- "\n",
- " # GluonTS wrapper for GIFT-Eval compatibility\n",
- " # It can receive any Forecaster from TimeCopilot\n",
- " predictor = GluonTSPredictor(\n",
- " forecaster=forecaster,\n",
- " max_length=4_096,\n",
- " batch_size=1_024,\n",
- " )\n",
- "\n",
- " # Run evaluation with GIFT-Eval's standardized metrics\n",
- " gifteval.evaluate_predictor(predictor, batch_size=512)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "ajc2VPQl5cPY"
- },
- "source": [
- "### Performing evaluation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "74XuerNA5rWU"
- },
- "source": [
- "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true,
- "id": "R41M3rDeLj_E"
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "\n",
- "\n",
- "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
- " combinations = [\n",
- " (\"m4_weekly\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"medium\"),\n",
- " (\"bizitobs_l2c/H\", \"long\"),\n",
- " ]\n",
- "\n",
- " for dataset_name, term in combinations:\n",
- " evaluate_forecaster(\n",
- " forecaster=ensemble,\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=f\"./results/timecopilot\",\n",
- " storage_path=storage_path,\n",
- " )\n",
- "\n",
- " # Load consolidated results in GIFT-Eval format\n",
- " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
- },
- "id": "cQ7WOUKCR_4h",
- "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661"
- },
- "outputs": [
- {
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "eval_df"
- },
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " TimeCopilot | \n",
- " 220437.833920 | \n",
- " 220437.833920 | \n",
- " 239.903438 | \n",
- " 1.916661 | \n",
- " 0.058617 | \n",
- " 0.058292 | \n",
- " 14.666592 | \n",
- " 469.508077 | \n",
- " 0.085537 | \n",
- " 0.043707 | \n",
- " 0.034997 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " TimeCopilot | \n",
- " 54.628522 | \n",
- " 54.628522 | \n",
- " 4.459038 | \n",
- " 0.444425 | \n",
- " 0.385657 | \n",
- " 0.580057 | \n",
- " 2.696251 | \n",
- " 7.391111 | \n",
- " 0.398400 | \n",
- " 0.240354 | \n",
- " 0.186401 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " TimeCopilot | \n",
- " 71.800877 | \n",
- " 71.800877 | \n",
- " 4.851640 | \n",
- " 0.488632 | \n",
- " 0.470714 | \n",
- " 0.757992 | \n",
- " 3.374162 | \n",
- " 8.473540 | \n",
- " 0.513086 | \n",
- " 0.293774 | \n",
- " 0.232035 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " TimeCopilot | \n",
- " 83.786483 | \n",
- " 83.786483 | \n",
- " 5.340595 | \n",
- " 0.566997 | \n",
- " 0.619369 | \n",
- " 0.782812 | \n",
- " 4.585122 | \n",
- " 9.153496 | \n",
- " 0.559122 | \n",
- " 0.326219 | \n",
- " 0.261100 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short TimeCopilot 220437.833920 \n",
- "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n",
- "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n",
- "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 220437.833920 239.903438 1.916661 \n",
- "1 54.628522 4.459038 0.444425 \n",
- "2 71.800877 4.851640 0.488632 \n",
- "3 83.786483 5.340595 0.566997 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.058617 0.058292 14.666592 \n",
- "1 0.385657 0.580057 2.696251 \n",
- "2 0.470714 0.757992 3.374162 \n",
- "3 0.619369 0.782812 4.585122 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 469.508077 0.085537 0.043707 \n",
- "1 7.391111 0.398400 0.240354 \n",
- "2 8.473540 0.513086 0.293774 \n",
- "3 9.153496 0.559122 0.326219 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.034997 Econ/Fin 1 \n",
- "1 0.186401 Web/CloudOps 7 \n",
- "2 0.232035 Web/CloudOps 7 \n",
- "3 0.261100 Web/CloudOps 7 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "if torch.cuda.is_available():\n",
- " from IPython.display import display\n",
- "\n",
- " display(eval_df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7JCiHenv6Dma"
- },
- "source": [
- "You can access the complete combination of datasets with the following:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "RmmMtHpA6HIu"
- },
- "outputs": [],
- "source": [
- "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "2WBJ-wjv6Kz6",
- "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "DATASETS_WITH_TERMS[:3]"
- ]
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "azZ6BczQLj_B"
+ },
+ "source": [
+ "# Foundation Model Ensemble (GIFT-Eval)\n",
+ "\n",
+ "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n",
+ "\n",
+ "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Model Description\n",
+ "\n",
+ "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n",
+ "\n",
+ "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n",
+ "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n",
+ "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n",
+ "\n",
+ "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n",
+ "\n",
+ "## TimeCopilot's Key Features\n",
+ "\n",
+ "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n",
+ "- **Ensemble capabilities**: Built-in ensemble methods\n",
+ "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n",
+ "- **Dependency management**: Handles complex model requirements automatically\n",
+ "- **GPU efficiency**: Optimized memory sharing and multi‑model execution"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M2SumVjnLj_C"
+ },
+ "source": [
+ "## Requirements and Installation\n",
+ "\n",
+ "Install TimeCopilot library:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "yQpa1NOvLj_D"
+ },
+ "outputs": [],
+ "source": [
+ "%pip install \"timecopilot>=0.0.22\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tVsga7ogLj_D"
+ },
+ "source": [
+ "## Dataset Setup\n",
+ "\n",
+ "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "mriqHxfOLj_D"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.gift_eval.eval import GIFTEval\n",
+ "\n",
+ "# TimeCopilot's built-in GIFT-Eval dataset downloader\n",
+ "# Handles the complete benchmark dataset with all 97 configurations\n",
+ "storage_path = \"./data/gift-eval\"\n",
+ "GIFTEval.download_data(storage_path=storage_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-SHX7gAtLj_D"
+ },
+ "source": [
+ "## Model Implementation\n",
+ "\n",
+ "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "iWYKncn03jVy"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.models.ensembles.median import MedianEnsemble\n",
+ "from timecopilot.models.foundation.chronos import Chronos\n",
+ "from timecopilot.models.foundation.timesfm import TimesFM\n",
+ "from timecopilot.models.foundation.tirex import TiRex\n",
+ "from timecopilot.models.utils.forecaster import Forecaster\n",
+ "\n",
+ "batch_size = 64\n",
+ "\n",
+ "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n",
+ "# Automatically handles dependency conflicts and GPU memory management\n",
+ "ensemble = MedianEnsemble(\n",
+ " models=[\n",
+ " # Each model uses TimeCopilot's unified interface despite different architectures\n",
+ " Chronos(\n",
+ " repo_id=\"amazon/chronos-2\",\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " TimesFM(\n",
+ " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " TiRex(\n",
+ " batch_size=batch_size,\n",
+ " ),\n",
+ " ],\n",
+ " alias=\"TimeCopilot\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sCjZScu5Lj_E"
+ },
+ "source": [
+ "## Evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yPKpn4e04KZD"
+ },
+ "source": [
+ "### Defining the evaluator"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M2YcjoDF5NH7"
+ },
+ "source": [
+ "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "RMvE9Cx9Lj_D"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from timecopilot.gift_eval.eval import GIFTEval\n",
+ "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n",
+ "\n",
+ "\n",
+ "def evaluate_forecaster(\n",
+ " forecaster: Forecaster,\n",
+ " dataset_name: str,\n",
+ " term: str,\n",
+ " output_path: str,\n",
+ " storage_path: str,\n",
+ "):\n",
+ " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n",
+ "\n",
+ " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n",
+ " gifteval = GIFTEval(\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=output_path,\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "\n",
+ " # GluonTS wrapper for GIFT-Eval compatibility\n",
+ " # It can receive any Forecaster from TimeCopilot\n",
+ " predictor = GluonTSPredictor(\n",
+ " forecaster=forecaster,\n",
+ " max_length=4_096,\n",
+ " batch_size=1_024,\n",
+ " )\n",
+ "\n",
+ " # Run evaluation with GIFT-Eval's standardized metrics\n",
+ " gifteval.evaluate_predictor(predictor, batch_size=512)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ajc2VPQl5cPY"
+ },
+ "source": [
+ "### Performing evaluation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74XuerNA5rWU"
+ },
+ "source": [
+ "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true,
+ "id": "R41M3rDeLj_E"
+ },
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "\n",
+ "\n",
+ "if torch.cuda.is_available(): # remove if you want to run on CPU\n",
+ " combinations = [\n",
+ " (\"m4_weekly\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"medium\"),\n",
+ " (\"bizitobs_l2c/H\", \"long\"),\n",
+ " ]\n",
+ "\n",
+ " for dataset_name, term in combinations:\n",
+ " evaluate_forecaster(\n",
+ " forecaster=ensemble,\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=f\"./results/timecopilot\",\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "\n",
+ " # Load consolidated results in GIFT-Eval format\n",
+ " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
},
+ "id": "cQ7WOUKCR_4h",
+ "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "7I9OQThW6OD8",
- "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb"
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "eval_df"
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "97"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " TimeCopilot | \n",
+ " 220437.833920 | \n",
+ " 220437.833920 | \n",
+ " 239.903438 | \n",
+ " 1.916661 | \n",
+ " 0.058617 | \n",
+ " 0.058292 | \n",
+ " 14.666592 | \n",
+ " 469.508077 | \n",
+ " 0.085537 | \n",
+ " 0.043707 | \n",
+ " 0.034997 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " TimeCopilot | \n",
+ " 54.628522 | \n",
+ " 54.628522 | \n",
+ " 4.459038 | \n",
+ " 0.444425 | \n",
+ " 0.385657 | \n",
+ " 0.580057 | \n",
+ " 2.696251 | \n",
+ " 7.391111 | \n",
+ " 0.398400 | \n",
+ " 0.240354 | \n",
+ " 0.186401 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " TimeCopilot | \n",
+ " 71.800877 | \n",
+ " 71.800877 | \n",
+ " 4.851640 | \n",
+ " 0.488632 | \n",
+ " 0.470714 | \n",
+ " 0.757992 | \n",
+ " 3.374162 | \n",
+ " 8.473540 | \n",
+ " 0.513086 | \n",
+ " 0.293774 | \n",
+ " 0.232035 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " TimeCopilot | \n",
+ " 83.786483 | \n",
+ " 83.786483 | \n",
+ " 5.340595 | \n",
+ " 0.566997 | \n",
+ " 0.619369 | \n",
+ " 0.782812 | \n",
+ " 4.585122 | \n",
+ " 9.153496 | \n",
+ " 0.559122 | \n",
+ " 0.326219 | \n",
+ " 0.261100 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "source": [
- "len(DATASETS_WITH_TERMS)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "BXupvNSFVWhG"
- },
- "source": [
- "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "xK8KTPic6UzR"
- },
- "source": [
- "## Reproducibility statement"
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short TimeCopilot 220437.833920 \n",
+ "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n",
+ "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n",
+ "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 220437.833920 239.903438 1.916661 \n",
+ "1 54.628522 4.459038 0.444425 \n",
+ "2 71.800877 4.851640 0.488632 \n",
+ "3 83.786483 5.340595 0.566997 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.058617 0.058292 14.666592 \n",
+ "1 0.385657 0.580057 2.696251 \n",
+ "2 0.470714 0.757992 3.374162 \n",
+ "3 0.619369 0.782812 4.585122 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 469.508077 0.085537 0.043707 \n",
+ "1 7.391111 0.398400 0.240354 \n",
+ "2 8.473540 0.513086 0.293774 \n",
+ "3 9.153496 0.559122 0.326219 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.034997 Econ/Fin 1 \n",
+ "1 0.186401 Web/CloudOps 7 \n",
+ "2 0.232035 Web/CloudOps 7 \n",
+ "3 0.261100 Web/CloudOps 7 "
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " from IPython.display import display\n",
+ "\n",
+ " display(eval_df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7JCiHenv6Dma"
+ },
+ "source": [
+ "You can access the complete combination of datasets with the following:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "RmmMtHpA6HIu"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "id": "2WBJ-wjv6Kz6",
+ "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "g0-oVisu6XX3"
- },
- "source": [
- "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)."
+ "data": {
+ "text/plain": [
+ "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]"
]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "DATASETS_WITH_TERMS[:3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "id": "7I9OQThW6OD8",
+ "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "0kJwJ8mX6TH2"
- },
- "outputs": [],
- "source": [
- "from timecopilot.models.stats import SeasonalNaive\n",
- "\n",
- "combinations = [\n",
- " (\"m4_weekly\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"short\"),\n",
- " (\"bizitobs_l2c/H\", \"medium\"),\n",
- " (\"bizitobs_l2c/H\", \"long\"),\n",
- "]\n",
- "\n",
- "for dataset_name, term in combinations:\n",
- " evaluate_forecaster(\n",
- " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n",
- " dataset_name=dataset_name,\n",
- " term=term,\n",
- " output_path=f\"./results/seasonal_naive\",\n",
- " storage_path=storage_path,\n",
- " )\n",
- "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")"
+ "data": {
+ "text/plain": [
+ "97"
]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(DATASETS_WITH_TERMS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BXupvNSFVWhG"
+ },
+ "source": [
+ "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xK8KTPic6UzR"
+ },
+ "source": [
+ "## Reproducibility statement"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "g0-oVisu6XX3"
+ },
+ "source": [
+ "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0kJwJ8mX6TH2"
+ },
+ "outputs": [],
+ "source": [
+ "from timecopilot.models.stats import SeasonalNaive\n",
+ "\n",
+ "combinations = [\n",
+ " (\"m4_weekly\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"short\"),\n",
+ " (\"bizitobs_l2c/H\", \"medium\"),\n",
+ " (\"bizitobs_l2c/H\", \"long\"),\n",
+ "]\n",
+ "\n",
+ "for dataset_name, term in combinations:\n",
+ " evaluate_forecaster(\n",
+ " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n",
+ " dataset_name=dataset_name,\n",
+ " term=term,\n",
+ " output_path=f\"./results/seasonal_naive\",\n",
+ " storage_path=storage_path,\n",
+ " )\n",
+ "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
},
+ "id": "0S-Oog_2UTCI",
+ "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
- },
- "id": "0S-Oog_2UTCI",
- "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1"
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "eval_df_sn"
},
- "outputs": [
- {
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "eval_df_sn"
- },
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " Seasonal_Naive | \n",
- " 453525.145918 | \n",
- " 453525.145918 | \n",
- " 347.991483 | \n",
- " 2.777295 | \n",
- " 0.089373 | \n",
- " 0.091613 | \n",
- " 26.631225 | \n",
- " 673.442756 | \n",
- " 0.122691 | \n",
- " 0.063399 | \n",
- " 0.060870 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " Seasonal_Naive | \n",
- " 281.843068 | \n",
- " 281.843068 | \n",
- " 12.531653 | \n",
- " 1.214064 | \n",
- " 1.360590 | \n",
- " 1.138373 | \n",
- " 7.486931 | \n",
- " 16.788182 | \n",
- " 0.904926 | \n",
- " 0.675488 | \n",
- " 0.521168 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " Seasonal_Naive | \n",
- " 456.373289 | \n",
- " 456.373289 | \n",
- " 15.667392 | \n",
- " 1.510286 | \n",
- " 1.691291 | \n",
- " 1.402410 | \n",
- " 18.533654 | \n",
- " 21.362895 | \n",
- " 1.293556 | \n",
- " 0.948684 | \n",
- " 0.904205 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " Seasonal_Naive | \n",
- " 309.272222 | \n",
- " 309.272222 | \n",
- " 13.635488 | \n",
- " 1.426054 | \n",
- " 2.438311 | \n",
- " 0.916854 | \n",
- " 22.036198 | \n",
- " 17.586137 | \n",
- " 1.074212 | \n",
- " 0.832895 | \n",
- " 0.941065 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
- "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
- "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
- "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 453525.145918 347.991483 2.777295 \n",
- "1 281.843068 12.531653 1.214064 \n",
- "2 456.373289 15.667392 1.510286 \n",
- "3 309.272222 13.635488 1.426054 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.089373 0.091613 26.631225 \n",
- "1 1.360590 1.138373 7.486931 \n",
- "2 1.691291 1.402410 18.533654 \n",
- "3 2.438311 0.916854 22.036198 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 673.442756 0.122691 0.063399 \n",
- "1 16.788182 0.904926 0.675488 \n",
- "2 21.362895 1.293556 0.948684 \n",
- "3 17.586137 1.074212 0.832895 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.060870 Econ/Fin 1 \n",
- "1 0.521168 Web/CloudOps 7 \n",
- "2 0.904205 Web/CloudOps 7 \n",
- "3 0.941065 Web/CloudOps 7 "
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " Seasonal_Naive | \n",
+ " 453525.145918 | \n",
+ " 453525.145918 | \n",
+ " 347.991483 | \n",
+ " 2.777295 | \n",
+ " 0.089373 | \n",
+ " 0.091613 | \n",
+ " 26.631225 | \n",
+ " 673.442756 | \n",
+ " 0.122691 | \n",
+ " 0.063399 | \n",
+ " 0.060870 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " Seasonal_Naive | \n",
+ " 281.843068 | \n",
+ " 281.843068 | \n",
+ " 12.531653 | \n",
+ " 1.214064 | \n",
+ " 1.360590 | \n",
+ " 1.138373 | \n",
+ " 7.486931 | \n",
+ " 16.788182 | \n",
+ " 0.904926 | \n",
+ " 0.675488 | \n",
+ " 0.521168 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " Seasonal_Naive | \n",
+ " 456.373289 | \n",
+ " 456.373289 | \n",
+ " 15.667392 | \n",
+ " 1.510286 | \n",
+ " 1.691291 | \n",
+ " 1.402410 | \n",
+ " 18.533654 | \n",
+ " 21.362895 | \n",
+ " 1.293556 | \n",
+ " 0.948684 | \n",
+ " 0.904205 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " Seasonal_Naive | \n",
+ " 309.272222 | \n",
+ " 309.272222 | \n",
+ " 13.635488 | \n",
+ " 1.426054 | \n",
+ " 2.438311 | \n",
+ " 0.916854 | \n",
+ " 22.036198 | \n",
+ " 17.586137 | \n",
+ " 1.074212 | \n",
+ " 0.832895 | \n",
+ " 0.941065 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "source": [
- "eval_df_sn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "id": "D1T6ar_H8Zo8"
- },
- "outputs": [],
- "source": [
- "official_eval_sn = pd.read_csv(\n",
- " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "id": "NETa8_6Y8ip-"
- },
- "outputs": [],
- "source": [
- "official_eval_sn = official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()"
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
+ "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
+ "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
+ "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 453525.145918 347.991483 2.777295 \n",
+ "1 281.843068 12.531653 1.214064 \n",
+ "2 456.373289 15.667392 1.510286 \n",
+ "3 309.272222 13.635488 1.426054 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.089373 0.091613 26.631225 \n",
+ "1 1.360590 1.138373 7.486931 \n",
+ "2 1.691291 1.402410 18.533654 \n",
+ "3 2.438311 0.916854 22.036198 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 673.442756 0.122691 0.063399 \n",
+ "1 16.788182 0.904926 0.675488 \n",
+ "2 21.362895 1.293556 0.948684 \n",
+ "3 17.586137 1.074212 0.832895 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.060870 Econ/Fin 1 \n",
+ "1 0.521168 Web/CloudOps 7 \n",
+ "2 0.904205 Web/CloudOps 7 \n",
+ "3 0.941065 Web/CloudOps 7 "
]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eval_df_sn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "id": "D1T6ar_H8Zo8"
+ },
+ "outputs": [],
+ "source": [
+ "official_eval_sn = pd.read_csv(\n",
+ " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "id": "NETa8_6Y8ip-"
+ },
+ "outputs": [],
+ "source": [
+ "official_eval_sn = (\n",
+ " official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 195
},
+ "id": "hkH2NLKMUVii",
+ "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 195
- },
- "id": "hkH2NLKMUVii",
- "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29"
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "official_eval_sn"
},
- "outputs": [
- {
- "data": {
- "application/vnd.google.colaboratory.intrinsic+json": {
- "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
- "type": "dataframe",
- "variable_name": "official_eval_sn"
- },
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dataset | \n",
- " model | \n",
- " eval_metrics/MSE[mean] | \n",
- " eval_metrics/MSE[0.5] | \n",
- " eval_metrics/MAE[0.5] | \n",
- " eval_metrics/MASE[0.5] | \n",
- " eval_metrics/MAPE[0.5] | \n",
- " eval_metrics/sMAPE[0.5] | \n",
- " eval_metrics/MSIS | \n",
- " eval_metrics/RMSE[mean] | \n",
- " eval_metrics/NRMSE[mean] | \n",
- " eval_metrics/ND[0.5] | \n",
- " eval_metrics/mean_weighted_sum_quantile_loss | \n",
- " domain | \n",
- " num_variates | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " m4_weekly/W/short | \n",
- " Seasonal_Naive | \n",
- " 453525.145918 | \n",
- " 453525.145918 | \n",
- " 347.991483 | \n",
- " 2.777295 | \n",
- " 0.089373 | \n",
- " 0.091613 | \n",
- " 26.631225 | \n",
- " 673.442756 | \n",
- " 0.122691 | \n",
- " 0.063399 | \n",
- " 0.060870 | \n",
- " Econ/Fin | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " bizitobs_l2c/H/short | \n",
- " Seasonal_Naive | \n",
- " 281.843068 | \n",
- " 281.843068 | \n",
- " 12.531653 | \n",
- " 1.214064 | \n",
- " 1.360590 | \n",
- " 1.138373 | \n",
- " 7.486931 | \n",
- " 16.788182 | \n",
- " 0.904926 | \n",
- " 0.675488 | \n",
- " 0.521168 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " bizitobs_l2c/H/medium | \n",
- " Seasonal_Naive | \n",
- " 456.373289 | \n",
- " 456.373289 | \n",
- " 15.667392 | \n",
- " 1.510286 | \n",
- " 1.691291 | \n",
- " 1.402410 | \n",
- " 18.533654 | \n",
- " 21.362895 | \n",
- " 1.293556 | \n",
- " 0.948684 | \n",
- " 0.904205 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " bizitobs_l2c/H/long | \n",
- " Seasonal_Naive | \n",
- " 309.272222 | \n",
- " 309.272222 | \n",
- " 13.635488 | \n",
- " 1.426054 | \n",
- " 2.438311 | \n",
- " 0.916854 | \n",
- " 22.036198 | \n",
- " 17.586137 | \n",
- " 1.074212 | \n",
- " 0.832895 | \n",
- " 0.941065 | \n",
- " Web/CloudOps | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "text/plain": [
- " dataset model eval_metrics/MSE[mean] \\\n",
- "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
- "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
- "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
- "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
- "\n",
- " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
- "0 453525.145918 347.991483 2.777295 \n",
- "1 281.843068 12.531653 1.214064 \n",
- "2 456.373289 15.667392 1.510286 \n",
- "3 309.272222 13.635488 1.426054 \n",
- "\n",
- " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
- "0 0.089373 0.091613 26.631225 \n",
- "1 1.360590 1.138373 7.486931 \n",
- "2 1.691291 1.402410 18.533654 \n",
- "3 2.438311 0.916854 22.036198 \n",
- "\n",
- " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
- "0 673.442756 0.122691 0.063399 \n",
- "1 16.788182 0.904926 0.675488 \n",
- "2 21.362895 1.293556 0.948684 \n",
- "3 17.586137 1.074212 0.832895 \n",
- "\n",
- " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
- "0 0.060870 Econ/Fin 1 \n",
- "1 0.521168 Web/CloudOps 7 \n",
- "2 0.904205 Web/CloudOps 7 \n",
- "3 0.941065 Web/CloudOps 7 "
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " eval_metrics/MSE[mean] | \n",
+ " eval_metrics/MSE[0.5] | \n",
+ " eval_metrics/MAE[0.5] | \n",
+ " eval_metrics/MASE[0.5] | \n",
+ " eval_metrics/MAPE[0.5] | \n",
+ " eval_metrics/sMAPE[0.5] | \n",
+ " eval_metrics/MSIS | \n",
+ " eval_metrics/RMSE[mean] | \n",
+ " eval_metrics/NRMSE[mean] | \n",
+ " eval_metrics/ND[0.5] | \n",
+ " eval_metrics/mean_weighted_sum_quantile_loss | \n",
+ " domain | \n",
+ " num_variates | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " m4_weekly/W/short | \n",
+ " Seasonal_Naive | \n",
+ " 453525.145918 | \n",
+ " 453525.145918 | \n",
+ " 347.991483 | \n",
+ " 2.777295 | \n",
+ " 0.089373 | \n",
+ " 0.091613 | \n",
+ " 26.631225 | \n",
+ " 673.442756 | \n",
+ " 0.122691 | \n",
+ " 0.063399 | \n",
+ " 0.060870 | \n",
+ " Econ/Fin | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bizitobs_l2c/H/short | \n",
+ " Seasonal_Naive | \n",
+ " 281.843068 | \n",
+ " 281.843068 | \n",
+ " 12.531653 | \n",
+ " 1.214064 | \n",
+ " 1.360590 | \n",
+ " 1.138373 | \n",
+ " 7.486931 | \n",
+ " 16.788182 | \n",
+ " 0.904926 | \n",
+ " 0.675488 | \n",
+ " 0.521168 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " bizitobs_l2c/H/medium | \n",
+ " Seasonal_Naive | \n",
+ " 456.373289 | \n",
+ " 456.373289 | \n",
+ " 15.667392 | \n",
+ " 1.510286 | \n",
+ " 1.691291 | \n",
+ " 1.402410 | \n",
+ " 18.533654 | \n",
+ " 21.362895 | \n",
+ " 1.293556 | \n",
+ " 0.948684 | \n",
+ " 0.904205 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " bizitobs_l2c/H/long | \n",
+ " Seasonal_Naive | \n",
+ " 309.272222 | \n",
+ " 309.272222 | \n",
+ " 13.635488 | \n",
+ " 1.426054 | \n",
+ " 2.438311 | \n",
+ " 0.916854 | \n",
+ " 22.036198 | \n",
+ " 17.586137 | \n",
+ " 1.074212 | \n",
+ " 0.832895 | \n",
+ " 0.941065 | \n",
+ " Web/CloudOps | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "source": [
- "official_eval_sn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "id": "OCifh_5D9B05"
- },
- "outputs": [],
- "source": [
- "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0wapKMgTFScM"
- },
- "source": [
- "## Changelog\n",
- "\n",
- "### **2025-11-06**\n",
- "\n",
- "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n",
- "\n",
- "### **2025-08-05**\n",
- "\n",
- "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)."
+ "text/plain": [
+ " dataset model eval_metrics/MSE[mean] \\\n",
+ "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n",
+ "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n",
+ "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n",
+ "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n",
+ "\n",
+ " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n",
+ "0 453525.145918 347.991483 2.777295 \n",
+ "1 281.843068 12.531653 1.214064 \n",
+ "2 456.373289 15.667392 1.510286 \n",
+ "3 309.272222 13.635488 1.426054 \n",
+ "\n",
+ " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n",
+ "0 0.089373 0.091613 26.631225 \n",
+ "1 1.360590 1.138373 7.486931 \n",
+ "2 1.691291 1.402410 18.533654 \n",
+ "3 2.438311 0.916854 22.036198 \n",
+ "\n",
+ " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n",
+ "0 673.442756 0.122691 0.063399 \n",
+ "1 16.788182 0.904926 0.675488 \n",
+ "2 21.362895 1.293556 0.948684 \n",
+ "3 17.586137 1.074212 0.832895 \n",
+ "\n",
+ " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n",
+ "0 0.060870 Econ/Fin 1 \n",
+ "1 0.521168 Web/CloudOps 7 \n",
+ "2 0.904205 Web/CloudOps 7 \n",
+ "3 0.941065 Web/CloudOps 7 "
]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
}
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": [],
- "runtime_attributes": {
- "runtime_version": "2025.07"
- }
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.12"
- }
+ ],
+ "source": [
+ "official_eval_sn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "id": "OCifh_5D9B05"
+ },
+ "outputs": [],
+ "source": [
+ "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0wapKMgTFScM"
+ },
+ "source": [
+ "## Changelog\n",
+ "\n",
+ "### **2025-11-06**\n",
+ "\n",
+ "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n",
+ "\n",
+ "### **2025-08-05**\n",
+ "\n",
+ "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)."
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": [],
+ "runtime_attributes": {
+ "runtime_version": "2025.07"
+ }
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
},
- "nbformat": 4,
- "nbformat_minor": 0
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/docs/examples/google-llms.ipynb b/docs/examples/google-llms.ipynb
index 7abb7bc6..9beba8db 100644
--- a/docs/examples/google-llms.ipynb
+++ b/docs/examples/google-llms.ipynb
@@ -38,6 +38,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()\n",
"\n",
"from timecopilot import TimeCopilot\n",
@@ -168,7 +169,7 @@
"outputs": [],
"source": [
"tc = TimeCopilot(\n",
- " llm='google-gla:gemini-3-pro-preview',\n",
+ " llm=\"google-gla:gemini-3-pro-preview\",\n",
")"
]
},
@@ -191,8 +192,8 @@
"from pydantic_ai.models.google import GoogleModel\n",
"from pydantic_ai.providers.google import GoogleProvider\n",
"\n",
- "provider = GoogleProvider(api_key='your-api-key')\n",
- "google_model = GoogleModel('gemini-3-pro-preview', provider=provider)\n",
+ "provider = GoogleProvider(api_key=\"your-api-key\")\n",
+ "google_model = GoogleModel(\"gemini-3-pro-preview\", provider=provider)\n",
"tc = TimeCopilot(\n",
" llm=google_model,\n",
")"
diff --git a/docs/examples/llm-providers.ipynb b/docs/examples/llm-providers.ipynb
index bab596fa..00af2955 100644
--- a/docs/examples/llm-providers.ipynb
+++ b/docs/examples/llm-providers.ipynb
@@ -35,6 +35,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()"
]
},
@@ -46,7 +47,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "from timecopilot import TimeCopilot\n"
+ "from timecopilot import TimeCopilot"
]
},
{
@@ -276,10 +277,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tc = TimeCopilot(\n",
- " llm=\"openai:gpt-4o\",\n",
- " retries=3\n",
- ")"
+ "tc = TimeCopilot(llm=\"openai:gpt-4o\", retries=3)"
]
},
{
@@ -365,10 +363,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tc = TimeCopilot(\n",
- " llm='ollama:gpt-oss:20b',\n",
- " retries=3\n",
- ")"
+ "tc = TimeCopilot(llm=\"ollama:gpt-oss:20b\", retries=3)"
]
},
{
@@ -396,10 +391,7 @@
" provider=OllamaProvider(base_url=\"http://localhost:11434/v1\"),\n",
")\n",
"\n",
- "tc = TimeCopilot(\n",
- " llm=llm,\n",
- " retries=3\n",
- ")"
+ "tc = TimeCopilot(llm=llm, retries=3)"
]
},
{
diff --git a/docs/examples/sktime.ipynb b/docs/examples/sktime.ipynb
index 276e40b6..dc1ce7a9 100644
--- a/docs/examples/sktime.ipynb
+++ b/docs/examples/sktime.ipynb
@@ -26,6 +26,7 @@
"outputs": [],
"source": [
"import nest_asyncio\n",
+ "\n",
"nest_asyncio.apply()\n",
"\n",
"import timecopilot\n",
@@ -108,10 +109,7 @@
"model_list = timecopilot.agent.DEFAULT_MODELS.copy()\n",
"model_list.append(adapted_model)\n",
"\n",
- "tc = timecopilot.TimeCopilot(\n",
- " llm=\"openai:gpt-4o\",\n",
- " forecasters=model_list\n",
- ")"
+ "tc = timecopilot.TimeCopilot(llm=\"openai:gpt-4o\", forecasters=model_list)"
]
},
{
diff --git a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
index e0ad916f..f61767cd 100644
--- a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
+++ b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb
@@ -132,7 +132,7 @@
" \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n",
" parse_dates=[\"ds\"],\n",
")\n",
- "df.head()\n"
+ "df.head()"
]
},
{
@@ -202,14 +202,13 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"tcf = TimeCopilotForecaster(\n",
" models=[\n",
" AutoARIMA(),\n",
" Chronos(repo_id=\"amazon/chronos-bolt-small\"),\n",
- " Moirai(), \n",
- " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"), \n",
- " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"), \n",
+ " Moirai(),\n",
+ " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"),\n",
+ " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"),\n",
" SeasonalNaive(),\n",
" ]\n",
")"
@@ -236,8 +235,14 @@
"metadata": {},
"outputs": [],
"source": [
- "level = [0, 20, 40, 60, 80] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n",
- "cv_df = tcf.cross_validation(df=df, h=12, level=level) "
+ "level = [\n",
+ " 0,\n",
+ " 20,\n",
+ " 40,\n",
+ " 60,\n",
+ " 80,\n",
+ "] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n",
+ "cv_df = tcf.cross_validation(df=df, h=12, level=level)"
]
},
{
@@ -605,12 +610,14 @@
],
"source": [
"eval_df = evaluate(\n",
- " cv_df.drop(columns=[\"cutoff\"]), \n",
- " train_df=df.query(\"ds <= '2024-08-31'\"), \n",
+ " cv_df.drop(columns=[\"cutoff\"]),\n",
+ " train_df=df.query(\"ds <= '2024-08-31'\"),\n",
" metrics=[partial(mase, seasonality=12), scaled_crps],\n",
" level=level,\n",
")\n",
- "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)"
+ "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n",
+ " 3\n",
+ ")"
]
}
],
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 1fb457e2..ab057cb7 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -9,9 +9,7 @@
def build_stub_llm(output: dict) -> FunctionModel: # noqa: D401
- def _response_fn(
- messages: list[ModelMessage], info: AgentInfo
- ) -> ModelResponse: # noqa: D401
+ def _response_fn(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: # noqa: D401
payload = json.dumps(output)
return ModelResponse(
parts=[ToolCallPart(tool_name="final_result", args=payload)]
From 7f6ab3a85e79da44ea87b24bf11c1d17d35ada81 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Mon, 19 Jan 2026 13:22:10 +0530
Subject: [PATCH 16/21] bad idea removing time_col: str = "ds"
---
timecopilot/utils/experiment_handler.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index cb8b2a3d..b32da0fc 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -37,6 +37,7 @@ def mase(
seasonality: int,
train_df: pd.DataFrame,
id_col: str = "unique_id",
+ time_col: str = "ds",
target_col: str = "y",
cutoff_col: str = "cutoff",
) -> pd.DataFrame:
From bd51e0085e8e21270fc738c711c9acbb2f6b4fb6 Mon Sep 17 00:00:00 2001
From: Kushagra Srivastav <76401345+Kushagra7777@users.noreply.github.com>
Date: Tue, 27 Jan 2026 01:25:47 +0530
Subject: [PATCH 17/21] adding tim_col for sorting
---
timecopilot/utils/experiment_handler.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index b32da0fc..89a7bcf0 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -52,7 +52,8 @@ def mase(
if cutoff_col in mean_abs_err.columns:
cutoff = mean_abs_err[cutoff_col]
mean_abs_err = mean_abs_err.drop(columns=[cutoff_col])
-
+
+ train_df = train_df.sort_values([id_col, time_col])
lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality)
scale = train_df[target_col].sub(lagged).abs()
scale = scale.groupby(train_df[id_col], observed=True).mean()
From 820794adc7bf7a010ba46298c47c28e2bb9d635a Mon Sep 17 00:00:00 2001
From: AzulGarza
Date: Mon, 26 Jan 2026 14:00:09 -0800
Subject: [PATCH 18/21] chore: add newest version of utilsforecast
---
pyproject.toml | 2 +-
uv.lock | 11 ++++++-----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 0e5769f5..c70b8ab6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,7 @@ dependencies = [
"transformers==4.40.1 ; python_full_version < '3.13'",
"transformers>=4.48,<5 ; python_full_version >= '3.13'",
"tsfeatures",
- "utilsforecast[plotting]",
+ "utilsforecast[plotting]>=0.2.15",
]
description = "The GenAI Forecasting Agent · LLMs × Time Series Foundation Models"
license = "MIT"
diff --git a/uv.lock b/uv.lock
index 39bbcae4..4f4ff21b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
version = 1
-revision = 3
+revision = 2
requires-python = ">=3.10"
resolution-markers = [
"python_full_version >= '3.14' and sys_platform == 'linux'",
@@ -7373,7 +7373,7 @@ requires-dist = [
{ name = "transformers", marker = "python_full_version < '3.13'", specifier = "==4.40.1" },
{ name = "transformers", marker = "python_full_version >= '3.13'", specifier = ">=4.48,<5" },
{ name = "tsfeatures" },
- { name = "utilsforecast", extras = ["plotting"] },
+ { name = "utilsforecast", extras = ["plotting"], specifier = ">=0.2.15" },
]
[package.metadata.requires-dev]
@@ -8176,18 +8176,19 @@ wheels = [
[[package]]
name = "utilsforecast"
-version = "0.2.12"
+version = "0.2.15"
source = { registry = "https://pypi.org/simple" }
dependencies = [
+ { name = "narwhals" },
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
{ name = "numpy", version = "2.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
{ name = "packaging" },
{ name = "pandas", version = "2.1.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
{ name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/48/d9/21e43a7419f0356043b52f72cd0262dd497f087fca78e90aebf8201a2339/utilsforecast-0.2.12.tar.gz", hash = "sha256:73f9dfd836a721a95c349f784bd75e18a4cb7c1469800e325414e22901e9775b", size = 41524, upload-time = "2025-02-24T19:41:56.25Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d9/55/8a37bb9ce36541fd353466259a07ccfdfaf25c996f3a71d989af4d4c7ba4/utilsforecast-0.2.15.tar.gz", hash = "sha256:c36d65d698a88d0fadc93d2d6737c304c3776397c60ae551ee17aa678caf3659", size = 60609, upload-time = "2025-12-03T16:29:08.652Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/75/9b/f6336ce71f4e6ed32877309314f549192cd6b982ad6d96fd8b1b5a230870/utilsforecast-0.2.12-py3-none-any.whl", hash = "sha256:acfba80bbf44e18433c206194f3ddd89cc28ff03aa0ba744b8040795a40b7b3f", size = 42219, upload-time = "2025-02-24T19:41:53.795Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/11/6c6ee61958b8e60f634b39e2f9a004f5d1c479cb962a2001fc3c72ceed78/utilsforecast-0.2.15-py3-none-any.whl", hash = "sha256:4b43bf5107e3cba13604cd86e93b5cf4906b57105b1900ccf98b8978aabd4150", size = 40344, upload-time = "2025-12-03T16:29:07.144Z" },
]
[package.optional-dependencies]
From 726da087459fa48716f3d2b1b64109f942b1fd91 Mon Sep 17 00:00:00 2001
From: AzulGarza
Date: Mon, 26 Jan 2026 14:00:27 -0800
Subject: [PATCH 19/21] fix: add compat with newest version utilsforecast
---
timecopilot/utils/experiment_handler.py | 81 +------------------------
1 file changed, 3 insertions(+), 78 deletions(-)
diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py
index 89a7bcf0..5e9bc4cb 100644
--- a/timecopilot/utils/experiment_handler.py
+++ b/timecopilot/utils/experiment_handler.py
@@ -5,13 +5,12 @@
from pathlib import Path
from typing import Any
-import numpy as np
import pandas as pd
from pydantic import BaseModel, Field
from pydantic_ai import Agent
from pydantic_ai.agent import AgentRunResult
from utilsforecast.evaluation import evaluate
-from utilsforecast.losses import mae
+from utilsforecast.losses import mase
from ..models.utils.forecaster import (
get_seasonality,
@@ -25,64 +24,6 @@
)
-def _zero_to_nan_pd(s: pd.Series) -> pd.Series:
- s = s.astype(float)
- s[s == 0] = np.nan
- return s
-
-
-def mase(
- df: pd.DataFrame,
- models: list[str],
- seasonality: int,
- train_df: pd.DataFrame,
- id_col: str = "unique_id",
- time_col: str = "ds",
- target_col: str = "y",
- cutoff_col: str = "cutoff",
-) -> pd.DataFrame:
- mean_abs_err = mae(
- df,
- models,
- id_col=id_col,
- target_col=target_col,
- ).set_index(id_col)
-
- cutoff = None
- if cutoff_col in mean_abs_err.columns:
- cutoff = mean_abs_err[cutoff_col]
- mean_abs_err = mean_abs_err.drop(columns=[cutoff_col])
-
- train_df = train_df.sort_values([id_col, time_col])
- lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality)
- scale = train_df[target_col].sub(lagged).abs()
- scale = scale.groupby(train_df[id_col], observed=True).mean()
- scale[scale < 1e-2] = 0.0
-
- scale = _zero_to_nan_pd(scale).reindex(mean_abs_err.index)
- res = mean_abs_err.div(scale, axis=0).fillna(0)
-
- if cutoff is not None:
- res.insert(0, cutoff_col, cutoff)
-
- res.index.name = id_col
- return res.reset_index()
-
-
-def generate_train_cv_splits(
- df: pd.DataFrame,
- cutoffs: pd.DataFrame,
-) -> pd.DataFrame:
- """
- based on `cutoffs` (columns `unique_id`, `cutoffs`)
- generates train cv splits using `df`
- """
- df = df.merge(cutoffs, on="unique_id", how="outer")
- df = df.query("ds <= cutoff")
- df = df.reset_index(drop=True)
- return df
-
-
class DatasetParams(BaseModel):
# TODO: make these required
freq: str | None = Field(description="The frequency of the data", default=None)
@@ -148,7 +89,7 @@ def read_df(path: str | Path) -> pd.DataFrame:
if suffix in {"csv", "txt"}:
df = read_fn(io.StringIO(resp.text)) # type: ignore[arg-type]
elif suffix in {"parquet"}:
- import pyarrow as pa
+ import pyarrow as pa # noqa: WPS433
table = pa.ipc.open_file(pa.BufferReader(resp.content)).read_all()
df = table.to_pandas()
@@ -252,30 +193,14 @@ def evaluate_forecast_df(
if forecast_df[model].isna().sum() > 0:
print(forecast_df.loc[forecast_df[model].isna()]["unique_id"].unique())
raise ValueError(f"model {model} has NaN values")
- cutoffs = forecast_df[["unique_id", "cutoff"]].drop_duplicates()
- train_cv_splits = generate_train_cv_splits(df=self.df, cutoffs=cutoffs)
-
- def add_id_cutoff(df: pd.DataFrame):
- df["id_cutoff"] = (
- df["unique_id"].astype(str) + "-" + df["cutoff"].astype(str)
- )
- for df in [cutoffs, train_cv_splits, forecast_df]:
- add_id_cutoff(df)
partial_mase = partial(mase, seasonality=self.seasonality)
eval_df = evaluate(
df=forecast_df,
- train_df=train_cv_splits,
+ train_df=self.df,
metrics=[partial_mase],
models=models,
- id_col="id_cutoff",
)
- if "cutoff" not in eval_df.columns and "id_cutoff" in eval_df.columns:
- eval_df = eval_df.merge(cutoffs, on="id_cutoff", how="left")
-
- cols = ["unique_id", "cutoff", "metric"] + models
- cols = [c for c in cols if c in eval_df.columns]
- eval_df = eval_df[cols]
return eval_df
From 452f5a2e6226fc485f953aa109d734782ee92121 Mon Sep 17 00:00:00 2001
From: AzulGarza
Date: Mon, 26 Jan 2026 14:08:53 -0800
Subject: [PATCH 20/21] fix: update experiment handler tests with mase updates
---
tests/utils/test_experiment_handler.py | 31 --------------------------
1 file changed, 31 deletions(-)
diff --git a/tests/utils/test_experiment_handler.py b/tests/utils/test_experiment_handler.py
index caccf4f3..1ec4d429 100644
--- a/tests/utils/test_experiment_handler.py
+++ b/tests/utils/test_experiment_handler.py
@@ -24,7 +24,6 @@
from timecopilot.utils.experiment_handler import (
ExperimentDataset,
ExperimentDatasetParser,
- generate_train_cv_splits,
mase,
)
@@ -233,36 +232,6 @@ def test_parse_params_no_query_infers_all():
)
-@pytest.mark.parametrize(
- "freq,n_windows,h,step_size",
- [
- ("H", 3, 2, 1),
- ("H", 1, 12, None),
- ("MS", 3, 2, 2),
- ],
-)
-def test_generate_train_cv_splits(freq, n_windows, h, step_size):
- df = generate_series(n_series=5, freq=freq)
- df["unique_id"] = df["unique_id"].astype(int)
- df_cv = generate_train_cv_splits_from_backtest_splits(
- df=df,
- n_windows=n_windows,
- step_size=step_size,
- h=h,
- freq=freq,
- )
- cutoffs = df_cv[["unique_id", "cutoff"]].drop_duplicates()
- train_cv_splits = generate_train_cv_splits(
- df=df,
- cutoffs=cutoffs,
- )
- p_sort_df = partial(sort_df, cols=["unique_id", "cutoff", "ds"])
- pd.testing.assert_frame_equal(
- p_sort_df(df_cv),
- p_sort_df(train_cv_splits),
- )
-
-
@pytest.mark.parametrize("model", models)
def test_eval(model):
freq = "H"
From 95deb756357b9cbb387d19d396706d46bb9d502d Mon Sep 17 00:00:00 2001
From: AzulGarza
Date: Mon, 26 Jan 2026 15:56:31 -0800
Subject: [PATCH 21/21] fix: import directly from utilsforecast
---
tests/utils/test_experiment_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/utils/test_experiment_handler.py b/tests/utils/test_experiment_handler.py
index 1ec4d429..5340585d 100644
--- a/tests/utils/test_experiment_handler.py
+++ b/tests/utils/test_experiment_handler.py
@@ -7,6 +7,7 @@
from pydantic_ai.models.function import AgentInfo, FunctionModel
from utilsforecast.data import generate_series
from utilsforecast.evaluation import evaluate
+from utilsforecast.losses import mase
from utilsforecast.processing import (
backtest_splits,
drop_index_if_pandas,
@@ -24,7 +25,6 @@
from timecopilot.utils.experiment_handler import (
ExperimentDataset,
ExperimentDatasetParser,
- mase,
)