diff --git a/docs/examples/agent-quickstart.ipynb b/docs/examples/agent-quickstart.ipynb index 92114456..e1f77b63 100644 --- a/docs/examples/agent-quickstart.ipynb +++ b/docs/examples/agent-quickstart.ipynb @@ -16,6 +16,7 @@ "outputs": [], "source": [ "import nest_asyncio\n", + "\n", "nest_asyncio.apply()" ] }, @@ -157,11 +158,10 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "tc = TimeCopilot(\n", " llm=\"openai:gpt-4o\",\n", " retries=3,\n", - ")\n" + ")" ] }, { diff --git a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb index 18f6134a..ee080351 100644 --- a/docs/examples/anomaly-detection-forecaster-quickstart.ipynb +++ b/docs/examples/anomaly-detection-forecaster-quickstart.ipynb @@ -183,7 +183,7 @@ " \"https://timecopilot.s3.amazonaws.com/public/data/the_anomaly_tour.csv\",\n", " parse_dates=[\"ds\"],\n", ")\n", - "df\n" + "df" ] }, { @@ -222,7 +222,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "tcf = TimeCopilotForecaster(\n", " models=[\n", " Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n", diff --git a/docs/examples/aws-bedrock.ipynb b/docs/examples/aws-bedrock.ipynb index 52f44523..9b691e08 100644 --- a/docs/examples/aws-bedrock.ipynb +++ b/docs/examples/aws-bedrock.ipynb @@ -37,6 +37,7 @@ "outputs": [], "source": [ "import nest_asyncio\n", + "\n", "nest_asyncio.apply()\n", "\n", "from timecopilot import TimeCopilot\n", @@ -164,7 +165,7 @@ "outputs": [], "source": [ "tc = TimeCopilot(\n", - " llm='bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0',\n", + " llm=\"bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0\",\n", ")" ] }, @@ -187,9 +188,7 @@ "source": [ "from pydantic_ai.models.bedrock import BedrockConverseModel\n", "\n", - "model = BedrockConverseModel(\n", - " 'us.anthropic.claude-3-5-sonnet-20241022-v2:0'\n", - ")\n", + "model = BedrockConverseModel(\"us.anthropic.claude-3-5-sonnet-20241022-v2:0\")\n", "tc = TimeCopilot(\n", " llm=model,\n", ")" @@ -219,7 +218,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")\n" + "df = pd.read_csv(\"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\")" ] }, { diff --git a/docs/examples/chronos-family.ipynb b/docs/examples/chronos-family.ipynb index f9bb2021..84174108 100644 --- a/docs/examples/chronos-family.ipynb +++ b/docs/examples/chronos-family.ipynb @@ -132,7 +132,7 @@ " \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n", " parse_dates=[\"ds\"],\n", ")\n", - "df.head()\n" + "df.head()" ] }, { @@ -241,7 +241,7 @@ "outputs": [], "source": [ "level = [20, 40, 60, 80]\n", - "cv_df = tcf.cross_validation(df=df, h=12, level=level) " + "cv_df = tcf.cross_validation(df=df, h=12, level=level)" ] }, { @@ -638,12 +638,14 @@ ], "source": [ "eval_df = evaluate(\n", - " cv_df.drop(columns=[\"cutoff\"]), \n", - " train_df=df.query(\"ds <= '2024-08-31'\"), \n", + " cv_df.drop(columns=[\"cutoff\"]),\n", + " train_df=df.query(\"ds <= '2024-08-31'\"),\n", " metrics=[partial(mase, seasonality=12), scaled_crps],\n", " level=level,\n", ")\n", - "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)" + "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n", + " 3\n", + ")" ] } ], diff --git a/docs/examples/cryptocurrency-quickstart.ipynb b/docs/examples/cryptocurrency-quickstart.ipynb index 4d4a39a0..e54bf0ef 100644 --- a/docs/examples/cryptocurrency-quickstart.ipynb +++ b/docs/examples/cryptocurrency-quickstart.ipynb @@ -63,7 +63,7 @@ "outputs": [], "source": [ "files = os.listdir(path)\n", - "files = [path+'/'+x for x in files]" + "files = [path + \"/\" + x for x in files]" ] }, { @@ -198,18 +198,18 @@ "# Read all filez and set them up to the readable structure for timecopilot\n", "for file in files:\n", " temp_df = pd.read_csv(file)\n", - " temp_df = temp_df[['Symbol','Date','Close']]\n", - " temp_df.columns = ['unique_id','ds','y']\n", - " big_df = pd.concat([big_df,temp_df])\n", + " temp_df = temp_df[[\"Symbol\", \"Date\", \"Close\"]]\n", + " temp_df.columns = [\"unique_id\", \"ds\", \"y\"]\n", + " big_df = pd.concat([big_df, temp_df])\n", "\n", "big_df = big_df.reset_index(drop=True)\n", "big_df[\"ds\"] = pd.to_datetime(big_df[\"ds\"], dayfirst=True, errors=\"coerce\")\n", "\n", - "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further. \n", + "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further.\n", "# big_df = big_df[big_df.ds >= \"2021-01-01\"]\n", - "cryptos=['MIOTA','XEM','ETH','LTC','DOGE','CRO','USDC','ADA']\n", - "big_df=big_df[big_df.unique_id.isin(cryptos)]\n", - "big_df=big_df.reset_index(drop=True)\n", + "cryptos = [\"MIOTA\", \"XEM\", \"ETH\", \"LTC\", \"DOGE\", \"CRO\", \"USDC\", \"ADA\"]\n", + "big_df = big_df[big_df.unique_id.isin(cryptos)]\n", + "big_df = big_df.reset_index(drop=True)\n", "big_df" ] }, @@ -341,6 +341,7 @@ " df_out.loc[idx, col] = np.nan\n", " return df_out\n", "\n", + "\n", "df_missing = add_missing(big_df, col=\"y\", frac=0.03, seed=42)\n", "df_missing = df_missing.sample(frac=1, random_state=42).reset_index(drop=True)\n", "print(df_missing)" @@ -709,12 +710,14 @@ } ], "source": [ - "anomaly_summary_xlm=anomalies_df[\n", + "anomaly_summary_xlm = anomalies_df[\n", " # (anomalies_df.unique_id=='SOL') & \\\n", - " ((anomalies_df['Chronos-anomaly']==True) | \\\n", - " (anomalies_df['SeasonalNaive-anomaly']==True) |\n", - " (anomalies_df['Theta-anomaly']==True)\n", - " )].reset_index(drop=True)\n", + " (\n", + " (anomalies_df[\"Chronos-anomaly\"] == True)\n", + " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n", + " | (anomalies_df[\"Theta-anomaly\"] == True)\n", + " )\n", + "].reset_index(drop=True)\n", "anomaly_summary_xlm" ] }, @@ -954,12 +957,14 @@ } ], "source": [ - "anomaly_summary_xlm=anomalies_df[\n", - " (anomalies_df.unique_id=='ADA') & \\\n", - " ((anomalies_df['Chronos-anomaly']==True) | \\\n", - " (anomalies_df['SeasonalNaive-anomaly']==True) |\n", - " (anomalies_df['Theta-anomaly']==True)\n", - " )].reset_index(drop=True)\n", + "anomaly_summary_xlm = anomalies_df[\n", + " (anomalies_df.unique_id == \"ADA\")\n", + " & (\n", + " (anomalies_df[\"Chronos-anomaly\"] == True)\n", + " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n", + " | (anomalies_df[\"Theta-anomaly\"] == True)\n", + " )\n", + "].reset_index(drop=True)\n", "anomaly_summary_xlm" ] }, @@ -1199,12 +1204,14 @@ } ], "source": [ - "anomaly_summary_xlm=anomalies_df[\n", - " (anomalies_df.unique_id=='ADA') & \\\n", - " ((anomalies_df['Chronos-anomaly']==True) & \\\n", - " (anomalies_df['SeasonalNaive-anomaly']==True) \\\n", - " # (anomalies_df['Theta-anomaly']==True)\n", - " )].reset_index(drop=True)\n", + "anomaly_summary_xlm = anomalies_df[\n", + " (anomalies_df.unique_id == \"ADA\")\n", + " & (\n", + " (anomalies_df[\"Chronos-anomaly\"] == True)\n", + " & (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n", + " # (anomalies_df['Theta-anomaly']==True)\n", + " )\n", + "].reset_index(drop=True)\n", "anomaly_summary_xlm" ] }, @@ -1241,12 +1248,12 @@ "source": [ "tcf1 = TimeCopilotForecaster(\n", " models=[\n", - " AutoARIMA(), \n", + " AutoARIMA(),\n", " Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n", " Theta(),\n", - " AutoETS(), \n", - " Moirai(), \n", - " Prophet(), \n", + " AutoETS(),\n", + " Moirai(),\n", + " Prophet(),\n", " SeasonalNaive(),\n", " ]\n", ")" @@ -1259,7 +1266,7 @@ "metadata": {}, "outputs": [], "source": [ - "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80,90])" + "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80, 90])" ] }, { @@ -1303,9 +1310,9 @@ "metadata": {}, "outputs": [], "source": [ - "eth_fcst_normal=fcst_df[(fcst_df.unique_id=='ETH')]\\\n", - " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n", - " .reset_index(drop=True)" + "eth_fcst_normal = fcst_df[(fcst_df.unique_id == \"ETH\")][\n", + " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n", + "].reset_index(drop=True)" ] }, { @@ -1345,9 +1352,9 @@ "metadata": {}, "outputs": [], "source": [ - "eth_fcst_missing=fcst_df[(fcst_df.unique_id=='ETH')]\\\n", - " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n", - " .reset_index(drop=True)" + "eth_fcst_missing = fcst_df[(fcst_df.unique_id == \"ETH\")][\n", + " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n", + "].reset_index(drop=True)" ] }, { @@ -1515,9 +1522,9 @@ } ], "source": [ - "compare=eth_fcst_normal.merge(eth_fcst_missing,on=['ds','unique_id'])\n", - "compare['dif']=abs(compare['Chronos_x']-compare['Chronos_y'])\n", - "print(compare['dif'].sum())" + "compare = eth_fcst_normal.merge(eth_fcst_missing, on=[\"ds\", \"unique_id\"])\n", + "compare[\"dif\"] = abs(compare[\"Chronos_x\"] - compare[\"Chronos_y\"])\n", + "print(compare[\"dif\"].sum())" ] }, { diff --git a/docs/examples/forecaster-quickstart.ipynb b/docs/examples/forecaster-quickstart.ipynb index 6239177a..88b27704 100644 --- a/docs/examples/forecaster-quickstart.ipynb +++ b/docs/examples/forecaster-quickstart.ipynb @@ -131,7 +131,7 @@ " \"https://timecopilot.s3.amazonaws.com/public/data/air_passengers.csv\",\n", " parse_dates=[\"ds\"],\n", ")\n", - "df.head()\n" + "df.head()" ] }, { @@ -170,13 +170,12 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "tcf = TimeCopilotForecaster(\n", " models=[\n", - " AutoARIMA(), \n", - " AutoETS(), \n", - " Moirai(), \n", - " Prophet(), \n", + " AutoARIMA(),\n", + " AutoETS(),\n", + " Moirai(),\n", + " Prophet(),\n", " SeasonalNaive(),\n", " ]\n", ")" diff --git a/docs/examples/gift-eval.ipynb b/docs/examples/gift-eval.ipynb index 70347b4d..7b499323 100644 --- a/docs/examples/gift-eval.ipynb +++ b/docs/examples/gift-eval.ipynb @@ -1,1782 +1,1784 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "azZ6BczQLj_B" - }, - "source": [ - "# Foundation Model Ensemble (GIFT-Eval)\n", - "\n", - "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n", - "\n", - "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n", - "\n", - "\n", - "\n", - "## Model Description\n", - "\n", - "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n", - "\n", - "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n", - "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n", - "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n", - "\n", - "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n", - "\n", - "## TimeCopilot's Key Features\n", - "\n", - "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n", - "- **Ensemble capabilities**: Built-in ensemble methods\n", - "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n", - "- **Dependency management**: Handles complex model requirements automatically\n", - "- **GPU efficiency**: Optimized memory sharing and multi‑model execution" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M2SumVjnLj_C" - }, - "source": [ - "## Requirements and Installation\n", - "\n", - "Install TimeCopilot library:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "id": "yQpa1NOvLj_D" - }, - "outputs": [], - "source": [ - "%pip install \"timecopilot>=0.0.22\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tVsga7ogLj_D" - }, - "source": [ - "## Dataset Setup\n", - "\n", - "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "id": "mriqHxfOLj_D" - }, - "outputs": [], - "source": [ - "from timecopilot.gift_eval.eval import GIFTEval\n", - "\n", - "# TimeCopilot's built-in GIFT-Eval dataset downloader\n", - "# Handles the complete benchmark dataset with all 97 configurations\n", - "storage_path = \"./data/gift-eval\"\n", - "GIFTEval.download_data(storage_path=storage_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-SHX7gAtLj_D" - }, - "source": [ - "## Model Implementation\n", - "\n", - "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iWYKncn03jVy" - }, - "outputs": [], - "source": [ - "from timecopilot.models.ensembles.median import MedianEnsemble\n", - "from timecopilot.models.foundation.chronos import Chronos\n", - "from timecopilot.models.foundation.timesfm import TimesFM\n", - "from timecopilot.models.foundation.tirex import TiRex\n", - "from timecopilot.models.utils.forecaster import Forecaster\n", - "\n", - "batch_size = 64\n", - "\n", - "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n", - "# Automatically handles dependency conflicts and GPU memory management\n", - "ensemble = MedianEnsemble(\n", - " models=[\n", - " # Each model uses TimeCopilot's unified interface despite different architectures\n", - " Chronos(\n", - " repo_id=\"amazon/chronos-2\",\n", - " batch_size=batch_size,\n", - " ),\n", - " TimesFM(\n", - " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n", - " batch_size=batch_size,\n", - " ),\n", - " TiRex(\n", - " batch_size=batch_size,\n", - " ),\n", - " ],\n", - " alias=\"TimeCopilot\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sCjZScu5Lj_E" - }, - "source": [ - "## Evaluation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yPKpn4e04KZD" - }, - "source": [ - "### Defining the evaluator" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M2YcjoDF5NH7" - }, - "source": [ - "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "RMvE9Cx9Lj_D" - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from timecopilot.gift_eval.eval import GIFTEval\n", - "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n", - "\n", - "\n", - "def evaluate_forecaster(\n", - " forecaster: Forecaster,\n", - " dataset_name: str,\n", - " term: str,\n", - " output_path: str,\n", - " storage_path: str,\n", - " ):\n", - " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n", - "\n", - " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n", - " gifteval = GIFTEval(\n", - " dataset_name=dataset_name,\n", - " term=term,\n", - " output_path=output_path,\n", - " storage_path=storage_path,\n", - " )\n", - "\n", - " # GluonTS wrapper for GIFT-Eval compatibility\n", - " # It can receive any Forecaster from TimeCopilot\n", - " predictor = GluonTSPredictor(\n", - " forecaster=forecaster,\n", - " max_length=4_096,\n", - " batch_size=1_024,\n", - " )\n", - "\n", - " # Run evaluation with GIFT-Eval's standardized metrics\n", - " gifteval.evaluate_predictor(predictor, batch_size=512)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ajc2VPQl5cPY" - }, - "source": [ - "### Performing evaluation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "74XuerNA5rWU" - }, - "source": [ - "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "id": "R41M3rDeLj_E" - }, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "\n", - "if torch.cuda.is_available(): # remove if you want to run on CPU\n", - " combinations = [\n", - " (\"m4_weekly\", \"short\"),\n", - " (\"bizitobs_l2c/H\", \"short\"),\n", - " (\"bizitobs_l2c/H\", \"medium\"),\n", - " (\"bizitobs_l2c/H\", \"long\"),\n", - " ]\n", - "\n", - " for dataset_name, term in combinations:\n", - " evaluate_forecaster(\n", - " forecaster=ensemble,\n", - " dataset_name=dataset_name,\n", - " term=term,\n", - " output_path=f\"./results/timecopilot\",\n", - " storage_path=storage_path,\n", - " )\n", - "\n", - " # Load consolidated results in GIFT-Eval format\n", - " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 195 - }, - "id": "cQ7WOUKCR_4h", - "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe", - "variable_name": "eval_df" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datasetmodeleval_metrics/MSE[mean]eval_metrics/MSE[0.5]eval_metrics/MAE[0.5]eval_metrics/MASE[0.5]eval_metrics/MAPE[0.5]eval_metrics/sMAPE[0.5]eval_metrics/MSISeval_metrics/RMSE[mean]eval_metrics/NRMSE[mean]eval_metrics/ND[0.5]eval_metrics/mean_weighted_sum_quantile_lossdomainnum_variates
0m4_weekly/W/shortTimeCopilot220437.833920220437.833920239.9034381.9166610.0586170.05829214.666592469.5080770.0855370.0437070.034997Econ/Fin1
1bizitobs_l2c/H/shortTimeCopilot54.62852254.6285224.4590380.4444250.3856570.5800572.6962517.3911110.3984000.2403540.186401Web/CloudOps7
2bizitobs_l2c/H/mediumTimeCopilot71.80087771.8008774.8516400.4886320.4707140.7579923.3741628.4735400.5130860.2937740.232035Web/CloudOps7
3bizitobs_l2c/H/longTimeCopilot83.78648383.7864835.3405950.5669970.6193690.7828124.5851229.1534960.5591220.3262190.261100Web/CloudOps7
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " dataset model eval_metrics/MSE[mean] \\\n", - "0 m4_weekly/W/short TimeCopilot 220437.833920 \n", - "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n", - "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n", - "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n", - "\n", - " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n", - "0 220437.833920 239.903438 1.916661 \n", - "1 54.628522 4.459038 0.444425 \n", - "2 71.800877 4.851640 0.488632 \n", - "3 83.786483 5.340595 0.566997 \n", - "\n", - " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n", - "0 0.058617 0.058292 14.666592 \n", - "1 0.385657 0.580057 2.696251 \n", - "2 0.470714 0.757992 3.374162 \n", - "3 0.619369 0.782812 4.585122 \n", - "\n", - " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n", - "0 469.508077 0.085537 0.043707 \n", - "1 7.391111 0.398400 0.240354 \n", - "2 8.473540 0.513086 0.293774 \n", - "3 9.153496 0.559122 0.326219 \n", - "\n", - " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n", - "0 0.034997 Econ/Fin 1 \n", - "1 0.186401 Web/CloudOps 7 \n", - "2 0.232035 Web/CloudOps 7 \n", - "3 0.261100 Web/CloudOps 7 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "if torch.cuda.is_available():\n", - " from IPython.display import display\n", - "\n", - " display(eval_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7JCiHenv6Dma" - }, - "source": [ - "You can access the complete combination of datasets with the following:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "RmmMtHpA6HIu" - }, - "outputs": [], - "source": [ - "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2WBJ-wjv6Kz6", - "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "DATASETS_WITH_TERMS[:3]" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "azZ6BczQLj_B" + }, + "source": [ + "# Foundation Model Ensemble (GIFT-Eval)\n", + "\n", + "This notebook demonstrates the evaluation of a foundation model ensemble built using the [TimeCopilot](https://timecopilot.dev) library on the [GIFT-Eval](https://huggingface.co/spaces/Salesforce/GIFT-Eval) benchmark.\n", + "\n", + "TimeCopilot is an open‑source AI agent for time series forecasting that provides a unified interface to multiple forecasting approaches, from foundation models to classical statistical, machine learning, and deep learning methods, along with built‑in ensemble capabilities for robust and explainable forecasting.\n", + "\n", + "\n", + "\n", + "## Model Description\n", + "\n", + "This ensemble leverages [**TimeCopilot's MedianEnsemble**](https://timecopilot.dev/api/models/ensembles/#timecopilot.models.ensembles.median.MedianEnsemble) feature, which combines three state-of-the-art foundation models:\n", + "\n", + "- [**Chronos-2** (AWS)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.chronos.Chronos).\n", + "- [**TimesFM-2.5** (Google Research)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.timesfm.TimesFM).\n", + "- [**TiRex** (NXAI)](https://timecopilot.dev/api/models/foundation/models/#timecopilot.models.foundation.tirex.TiRex).\n", + "\n", + "The ensemble uses **median aggregation with isotonic regression** to ensure monotonic quantiles for probabilistic forecasting, providing robustness against outliers and model-specific biases.\n", + "\n", + "## TimeCopilot's Key Features\n", + "\n", + "- [**Foundation model integration**](https://timecopilot.dev/model-hub/): Unified API for 30+ state‑of‑the‑art foundation models\n", + "- **Ensemble capabilities**: Built-in ensemble methods\n", + "- **Zero-shot capability**: Leverages pretrained foundation models out‑of‑the‑box\n", + "- **Dependency management**: Handles complex model requirements automatically\n", + "- **GPU efficiency**: Optimized memory sharing and multi‑model execution" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M2SumVjnLj_C" + }, + "source": [ + "## Requirements and Installation\n", + "\n", + "Install TimeCopilot library:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "yQpa1NOvLj_D" + }, + "outputs": [], + "source": [ + "%pip install \"timecopilot>=0.0.22\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tVsga7ogLj_D" + }, + "source": [ + "## Dataset Setup\n", + "\n", + "TimeCopilot includes built-in [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) for dataset handling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "mriqHxfOLj_D" + }, + "outputs": [], + "source": [ + "from timecopilot.gift_eval.eval import GIFTEval\n", + "\n", + "# TimeCopilot's built-in GIFT-Eval dataset downloader\n", + "# Handles the complete benchmark dataset with all 97 configurations\n", + "storage_path = \"./data/gift-eval\"\n", + "GIFTEval.download_data(storage_path=storage_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-SHX7gAtLj_D" + }, + "source": [ + "## Model Implementation\n", + "\n", + "Using TimeCopilot's [model hub](https://timecopilot.dev/model-hub/) and [ensemble capabilities](https://timecopilot.dev/api/models/ensembles/) to create a foundation model ensemble:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iWYKncn03jVy" + }, + "outputs": [], + "source": [ + "from timecopilot.models.ensembles.median import MedianEnsemble\n", + "from timecopilot.models.foundation.chronos import Chronos\n", + "from timecopilot.models.foundation.timesfm import TimesFM\n", + "from timecopilot.models.foundation.tirex import TiRex\n", + "from timecopilot.models.utils.forecaster import Forecaster\n", + "\n", + "batch_size = 64\n", + "\n", + "# TimeCopilot's MedianEnsemble with isotonic regression for robust forecasting\n", + "# Automatically handles dependency conflicts and GPU memory management\n", + "ensemble = MedianEnsemble(\n", + " models=[\n", + " # Each model uses TimeCopilot's unified interface despite different architectures\n", + " Chronos(\n", + " repo_id=\"amazon/chronos-2\",\n", + " batch_size=batch_size,\n", + " ),\n", + " TimesFM(\n", + " repo_id=\"google/timesfm-2.5-200m-pytorch\",\n", + " batch_size=batch_size,\n", + " ),\n", + " TiRex(\n", + " batch_size=batch_size,\n", + " ),\n", + " ],\n", + " alias=\"TimeCopilot\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sCjZScu5Lj_E" + }, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yPKpn4e04KZD" + }, + "source": [ + "### Defining the evaluator" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M2YcjoDF5NH7" + }, + "source": [ + "With TimeCopilot you can evaluate any [Forecaster](https://timecopilot.dev/api/models/utils/forecaster/#timecopilot.models.utils.forecaster.Forecaster) in a standardized way using its [GIFT-Eval](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) integration." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "RMvE9Cx9Lj_D" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from timecopilot.gift_eval.eval import GIFTEval\n", + "from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor\n", + "\n", + "\n", + "def evaluate_forecaster(\n", + " forecaster: Forecaster,\n", + " dataset_name: str,\n", + " term: str,\n", + " output_path: str,\n", + " storage_path: str,\n", + "):\n", + " \"\"\"Evaluate a forecaster on a GIFT-Eval dataset defined by dataset name and term.\"\"\"\n", + "\n", + " # TimeCopilot's GIFT-Eval loader handles dataset preprocessing automatically\n", + " gifteval = GIFTEval(\n", + " dataset_name=dataset_name,\n", + " term=term,\n", + " output_path=output_path,\n", + " storage_path=storage_path,\n", + " )\n", + "\n", + " # GluonTS wrapper for GIFT-Eval compatibility\n", + " # It can receive any Forecaster from TimeCopilot\n", + " predictor = GluonTSPredictor(\n", + " forecaster=forecaster,\n", + " max_length=4_096,\n", + " batch_size=1_024,\n", + " )\n", + "\n", + " # Run evaluation with GIFT-Eval's standardized metrics\n", + " gifteval.evaluate_predictor(predictor, batch_size=512)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ajc2VPQl5cPY" + }, + "source": [ + "### Performing evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "74XuerNA5rWU" + }, + "source": [ + "In the GIFT-Eval benchmark, each dataset is defined by a combination of a dataset name and its term (short, medium or long)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "R41M3rDeLj_E" + }, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "\n", + "if torch.cuda.is_available(): # remove if you want to run on CPU\n", + " combinations = [\n", + " (\"m4_weekly\", \"short\"),\n", + " (\"bizitobs_l2c/H\", \"short\"),\n", + " (\"bizitobs_l2c/H\", \"medium\"),\n", + " (\"bizitobs_l2c/H\", \"long\"),\n", + " ]\n", + "\n", + " for dataset_name, term in combinations:\n", + " evaluate_forecaster(\n", + " forecaster=ensemble,\n", + " dataset_name=dataset_name,\n", + " term=term,\n", + " output_path=f\"./results/timecopilot\",\n", + " storage_path=storage_path,\n", + " )\n", + "\n", + " # Load consolidated results in GIFT-Eval format\n", + " eval_df = pd.read_csv(\"./results/timecopilot/all_results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 }, + "id": "cQ7WOUKCR_4h", + "outputId": "62f5b585-0192-4ab2-94f2-3c756759c661" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7I9OQThW6OD8", - "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"eval_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"TimeCopilot\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 110183.88162948907,\n \"min\": 54.628521701648495,\n \"max\": 220437.8339198133,\n \"num_unique_values\": 4,\n \"samples\": [\n 54.628521701648495\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117.5103935731355,\n \"min\": 4.459037998423877,\n \"max\": 239.90343810466263,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.459037998423877\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7101326191883409,\n \"min\": 0.4444247053072128,\n \"max\": 1.9166610431503668,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.4444247053072128\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23720193164730496,\n \"min\": 0.0586168165866288,\n \"max\": 0.6193693756574479,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3856569753040291\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.33666690612984057,\n \"min\": 0.0582917170082478,\n \"max\": 0.7828120931245798,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.580056537856935\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.612024803787436,\n \"min\": 2.6962511371244107,\n \"max\": 14.666591848004687,\n \"num_unique_values\": 4,\n \"samples\": [\n 2.6962511371244107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.58548920717556,\n \"min\": 7.391110992377837,\n \"max\": 469.5080765224527,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.391110992377837\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21332043210127052,\n \"min\": 0.0855370954165192,\n \"max\": 0.5591219336008744,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.3983998114515611\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.12658863861452183,\n \"min\": 0.0437066885577381,\n \"max\": 0.3262189446902356,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.2403535679087262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10057193880049943,\n \"min\": 0.0349972340009048,\n \"max\": 0.2611001089245355,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.1864009507132035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "eval_df" }, - "outputs": [ - { - "data": { - "text/plain": [ - "97" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetmodeleval_metrics/MSE[mean]eval_metrics/MSE[0.5]eval_metrics/MAE[0.5]eval_metrics/MASE[0.5]eval_metrics/MAPE[0.5]eval_metrics/sMAPE[0.5]eval_metrics/MSISeval_metrics/RMSE[mean]eval_metrics/NRMSE[mean]eval_metrics/ND[0.5]eval_metrics/mean_weighted_sum_quantile_lossdomainnum_variates
0m4_weekly/W/shortTimeCopilot220437.833920220437.833920239.9034381.9166610.0586170.05829214.666592469.5080770.0855370.0437070.034997Econ/Fin1
1bizitobs_l2c/H/shortTimeCopilot54.62852254.6285224.4590380.4444250.3856570.5800572.6962517.3911110.3984000.2403540.186401Web/CloudOps7
2bizitobs_l2c/H/mediumTimeCopilot71.80087771.8008774.8516400.4886320.4707140.7579923.3741628.4735400.5130860.2937740.232035Web/CloudOps7
3bizitobs_l2c/H/longTimeCopilot83.78648383.7864835.3405950.5669970.6193690.7828124.5851229.1534960.5591220.3262190.261100Web/CloudOps7
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "len(DATASETS_WITH_TERMS)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BXupvNSFVWhG" - }, - "source": [ - "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xK8KTPic6UzR" - }, - "source": [ - "## Reproducibility statement" + "text/plain": [ + " dataset model eval_metrics/MSE[mean] \\\n", + "0 m4_weekly/W/short TimeCopilot 220437.833920 \n", + "1 bizitobs_l2c/H/short TimeCopilot 54.628522 \n", + "2 bizitobs_l2c/H/medium TimeCopilot 71.800877 \n", + "3 bizitobs_l2c/H/long TimeCopilot 83.786483 \n", + "\n", + " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n", + "0 220437.833920 239.903438 1.916661 \n", + "1 54.628522 4.459038 0.444425 \n", + "2 71.800877 4.851640 0.488632 \n", + "3 83.786483 5.340595 0.566997 \n", + "\n", + " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n", + "0 0.058617 0.058292 14.666592 \n", + "1 0.385657 0.580057 2.696251 \n", + "2 0.470714 0.757992 3.374162 \n", + "3 0.619369 0.782812 4.585122 \n", + "\n", + " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n", + "0 469.508077 0.085537 0.043707 \n", + "1 7.391111 0.398400 0.240354 \n", + "2 8.473540 0.513086 0.293774 \n", + "3 9.153496 0.559122 0.326219 \n", + "\n", + " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n", + "0 0.034997 Econ/Fin 1 \n", + "1 0.186401 Web/CloudOps 7 \n", + "2 0.232035 Web/CloudOps 7 \n", + "3 0.261100 Web/CloudOps 7 " ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if torch.cuda.is_available():\n", + " from IPython.display import display\n", + "\n", + " display(eval_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7JCiHenv6Dma" + }, + "source": [ + "You can access the complete combination of datasets with the following:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "RmmMtHpA6HIu" + }, + "outputs": [], + "source": [ + "from timecopilot.gift_eval.utils import DATASETS_WITH_TERMS" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "2WBJ-wjv6Kz6", + "outputId": "5245845d-7d53-4989-fff8-3dc253cdbfa0" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "g0-oVisu6XX3" - }, - "source": [ - "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)." + "data": { + "text/plain": [ + "[('m4_yearly', 'short'), ('m4_quarterly', 'short'), ('m4_monthly', 'short')]" ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DATASETS_WITH_TERMS[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "7I9OQThW6OD8", + "outputId": "fe927d2f-212a-436f-c007-16f12cbe7efb" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0kJwJ8mX6TH2" - }, - "outputs": [], - "source": [ - "from timecopilot.models.stats import SeasonalNaive\n", - "\n", - "combinations = [\n", - " (\"m4_weekly\", \"short\"),\n", - " (\"bizitobs_l2c/H\", \"short\"),\n", - " (\"bizitobs_l2c/H\", \"medium\"),\n", - " (\"bizitobs_l2c/H\", \"long\"),\n", - "]\n", - "\n", - "for dataset_name, term in combinations:\n", - " evaluate_forecaster(\n", - " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n", - " dataset_name=dataset_name,\n", - " term=term,\n", - " output_path=f\"./results/seasonal_naive\",\n", - " storage_path=storage_path,\n", - " )\n", - "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")" + "data": { + "text/plain": [ + "97" ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(DATASETS_WITH_TERMS)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BXupvNSFVWhG" + }, + "source": [ + "The code for the complete evaluation can be found in the [library's repo](https://github.com/TimeCopilot/timecopilot/tree/main/experiments/gift-eval/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xK8KTPic6UzR" + }, + "source": [ + "## Reproducibility statement" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g0-oVisu6XX3" + }, + "source": [ + "The TimeCopilot's [GIFT-Eval integration](https://timecopilot.dev/api/gift-eval/gift-eval/#timecopilot.gift_eval.eval.GIFTEval) was designed considering reproducibility as one of its main features. The library can replicate the official results provided by the mantainers of the benchmark for the [`SeasonalNaive`](https://huggingface.co/spaces/Salesforce/GIFT-Eval/tree/main/results/seasonal_naive) method. The following code replicates the Seasonal Naive performance for the datasets evaluated in this notebook. The reproducibility of the results for the rest of the datasets are tested continuously in the [library's repo](https://github.com/TimeCopilot/timecopilot/blob/main/tests/gift_eval/test_evaluation.py)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0kJwJ8mX6TH2" + }, + "outputs": [], + "source": [ + "from timecopilot.models.stats import SeasonalNaive\n", + "\n", + "combinations = [\n", + " (\"m4_weekly\", \"short\"),\n", + " (\"bizitobs_l2c/H\", \"short\"),\n", + " (\"bizitobs_l2c/H\", \"medium\"),\n", + " (\"bizitobs_l2c/H\", \"long\"),\n", + "]\n", + "\n", + "for dataset_name, term in combinations:\n", + " evaluate_forecaster(\n", + " forecaster=SeasonalNaive(alias=\"Seasonal_Naive\"),\n", + " dataset_name=dataset_name,\n", + " term=term,\n", + " output_path=f\"./results/seasonal_naive\",\n", + " storage_path=storage_path,\n", + " )\n", + "eval_df_sn = pd.read_csv(\"./results/seasonal_naive/all_results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 }, + "id": "0S-Oog_2UTCI", + "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 195 - }, - "id": "0S-Oog_2UTCI", - "outputId": "ae9ed968-a6b4-4f50-b6fd-24f2873f00d1" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "eval_df_sn" }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"eval_df_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe", - "variable_name": "eval_df_sn" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datasetmodeleval_metrics/MSE[mean]eval_metrics/MSE[0.5]eval_metrics/MAE[0.5]eval_metrics/MASE[0.5]eval_metrics/MAPE[0.5]eval_metrics/sMAPE[0.5]eval_metrics/MSISeval_metrics/RMSE[mean]eval_metrics/NRMSE[mean]eval_metrics/ND[0.5]eval_metrics/mean_weighted_sum_quantile_lossdomainnum_variates
0m4_weekly/W/shortSeasonal_Naive453525.145918453525.145918347.9914832.7772950.0893730.09161326.631225673.4427560.1226910.0633990.060870Econ/Fin1
1bizitobs_l2c/H/shortSeasonal_Naive281.843068281.84306812.5316531.2140641.3605901.1383737.48693116.7881820.9049260.6754880.521168Web/CloudOps7
2bizitobs_l2c/H/mediumSeasonal_Naive456.373289456.37328915.6673921.5102861.6912911.40241018.53365421.3628951.2935560.9486840.904205Web/CloudOps7
3bizitobs_l2c/H/longSeasonal_Naive309.272222309.27222213.6354881.4260542.4383110.91685422.03619817.5861371.0742120.8328950.941065Web/CloudOps7
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " dataset model eval_metrics/MSE[mean] \\\n", - "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n", - "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n", - "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n", - "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n", - "\n", - " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n", - "0 453525.145918 347.991483 2.777295 \n", - "1 281.843068 12.531653 1.214064 \n", - "2 456.373289 15.667392 1.510286 \n", - "3 309.272222 13.635488 1.426054 \n", - "\n", - " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n", - "0 0.089373 0.091613 26.631225 \n", - "1 1.360590 1.138373 7.486931 \n", - "2 1.691291 1.402410 18.533654 \n", - "3 2.438311 0.916854 22.036198 \n", - "\n", - " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n", - "0 673.442756 0.122691 0.063399 \n", - "1 16.788182 0.904926 0.675488 \n", - "2 21.362895 1.293556 0.948684 \n", - "3 17.586137 1.074212 0.832895 \n", - "\n", - " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n", - "0 0.060870 Econ/Fin 1 \n", - "1 0.521168 Web/CloudOps 7 \n", - "2 0.904205 Web/CloudOps 7 \n", - "3 0.941065 Web/CloudOps 7 " - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetmodeleval_metrics/MSE[mean]eval_metrics/MSE[0.5]eval_metrics/MAE[0.5]eval_metrics/MASE[0.5]eval_metrics/MAPE[0.5]eval_metrics/sMAPE[0.5]eval_metrics/MSISeval_metrics/RMSE[mean]eval_metrics/NRMSE[mean]eval_metrics/ND[0.5]eval_metrics/mean_weighted_sum_quantile_lossdomainnum_variates
0m4_weekly/W/shortSeasonal_Naive453525.145918453525.145918347.9914832.7772950.0893730.09161326.631225673.4427560.1226910.0633990.060870Econ/Fin1
1bizitobs_l2c/H/shortSeasonal_Naive281.843068281.84306812.5316531.2140641.3605901.1383737.48693116.7881820.9049260.6754880.521168Web/CloudOps7
2bizitobs_l2c/H/mediumSeasonal_Naive456.373289456.37328915.6673921.5102861.6912911.40241018.53365421.3628951.2935560.9486840.904205Web/CloudOps7
3bizitobs_l2c/H/longSeasonal_Naive309.272222309.27222213.6354881.4260542.4383110.91685422.03619817.5861371.0742120.8328950.941065Web/CloudOps7
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "eval_df_sn" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "id": "D1T6ar_H8Zo8" - }, - "outputs": [], - "source": [ - "official_eval_sn = pd.read_csv(\n", - " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "id": "NETa8_6Y8ip-" - }, - "outputs": [], - "source": [ - "official_eval_sn = official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()" + "text/plain": [ + " dataset model eval_metrics/MSE[mean] \\\n", + "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n", + "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n", + "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n", + "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n", + "\n", + " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n", + "0 453525.145918 347.991483 2.777295 \n", + "1 281.843068 12.531653 1.214064 \n", + "2 456.373289 15.667392 1.510286 \n", + "3 309.272222 13.635488 1.426054 \n", + "\n", + " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n", + "0 0.089373 0.091613 26.631225 \n", + "1 1.360590 1.138373 7.486931 \n", + "2 1.691291 1.402410 18.533654 \n", + "3 2.438311 0.916854 22.036198 \n", + "\n", + " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n", + "0 673.442756 0.122691 0.063399 \n", + "1 16.788182 0.904926 0.675488 \n", + "2 21.362895 1.293556 0.948684 \n", + "3 17.586137 1.074212 0.832895 \n", + "\n", + " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n", + "0 0.060870 Econ/Fin 1 \n", + "1 0.521168 Web/CloudOps 7 \n", + "2 0.904205 Web/CloudOps 7 \n", + "3 0.941065 Web/CloudOps 7 " ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eval_df_sn" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "D1T6ar_H8Zo8" + }, + "outputs": [], + "source": [ + "official_eval_sn = pd.read_csv(\n", + " \"https://huggingface.co/spaces/Salesforce/GIFT-Eval/raw/main/results/seasonal_naive/all_results.csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "NETa8_6Y8ip-" + }, + "outputs": [], + "source": [ + "official_eval_sn = (\n", + " official_eval_sn.set_index(\"dataset\").loc[eval_df_sn[\"dataset\"]].reset_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 }, + "id": "hkH2NLKMUVii", + "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 195 - }, - "id": "hkH2NLKMUVii", - "outputId": "a1fda83c-6c8c-4055-9a25-ca603e8bce29" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "official_eval_sn" }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"official_eval_sn\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"dataset\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"bizitobs_l2c/H/short\",\n \"bizitobs_l2c/H/long\",\n \"m4_weekly/W/short\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Seasonal_Naive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 226588.0044876525,\n \"min\": 281.8430679563492,\n \"max\": 453525.1459181487,\n \"num_unique_values\": 4,\n \"samples\": [\n 281.8430679563492\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 167.0283682913528,\n \"min\": 12.53165302579365,\n \"max\": 347.99148275123207,\n \"num_unique_values\": 4,\n \"samples\": [\n 12.53165302579365\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MASE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.707968734533615,\n \"min\": 1.214064126760004,\n \"max\": 2.777295047362158,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.214064126760004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9801583676123454,\n \"min\": 0.0893728952221883,\n \"max\": 2.4383105011700468,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.3605904339028776\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/sMAPE[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5663823688490078,\n \"min\": 0.0916128671473242,\n \"max\": 1.4024095456148358,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.138373051002047\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/MSIS\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.160719192913836,\n \"min\": 7.486930567002142,\n \"max\": 26.63122519962653,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.486930567002142\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/RMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 327.4379213322898,\n \"min\": 16.788182389894065,\n \"max\": 673.442756229621,\n \"num_unique_values\": 4,\n \"samples\": [\n 16.788182389894065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/NRMSE[mean]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5095759341171591,\n \"min\": 0.1226908336142798,\n \"max\": 1.293555748999092,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.9049260260934668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/ND[0.5]\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39405258118809505,\n \"min\": 0.0633986552152626,\n \"max\": 0.9486843898499616,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.675488192208351\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"eval_metrics/mean_weighted_sum_quantile_loss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.41050985815697427,\n \"min\": 0.060870394523117,\n \"max\": 0.941065124237754,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.5211675771895117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Web/CloudOps\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_variates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 2,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe", - "variable_name": "official_eval_sn" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datasetmodeleval_metrics/MSE[mean]eval_metrics/MSE[0.5]eval_metrics/MAE[0.5]eval_metrics/MASE[0.5]eval_metrics/MAPE[0.5]eval_metrics/sMAPE[0.5]eval_metrics/MSISeval_metrics/RMSE[mean]eval_metrics/NRMSE[mean]eval_metrics/ND[0.5]eval_metrics/mean_weighted_sum_quantile_lossdomainnum_variates
0m4_weekly/W/shortSeasonal_Naive453525.145918453525.145918347.9914832.7772950.0893730.09161326.631225673.4427560.1226910.0633990.060870Econ/Fin1
1bizitobs_l2c/H/shortSeasonal_Naive281.843068281.84306812.5316531.2140641.3605901.1383737.48693116.7881820.9049260.6754880.521168Web/CloudOps7
2bizitobs_l2c/H/mediumSeasonal_Naive456.373289456.37328915.6673921.5102861.6912911.40241018.53365421.3628951.2935560.9486840.904205Web/CloudOps7
3bizitobs_l2c/H/longSeasonal_Naive309.272222309.27222213.6354881.4260542.4383110.91685422.03619817.5861371.0742120.8328950.941065Web/CloudOps7
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " dataset model eval_metrics/MSE[mean] \\\n", - "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n", - "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n", - "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n", - "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n", - "\n", - " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n", - "0 453525.145918 347.991483 2.777295 \n", - "1 281.843068 12.531653 1.214064 \n", - "2 456.373289 15.667392 1.510286 \n", - "3 309.272222 13.635488 1.426054 \n", - "\n", - " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n", - "0 0.089373 0.091613 26.631225 \n", - "1 1.360590 1.138373 7.486931 \n", - "2 1.691291 1.402410 18.533654 \n", - "3 2.438311 0.916854 22.036198 \n", - "\n", - " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n", - "0 673.442756 0.122691 0.063399 \n", - "1 16.788182 0.904926 0.675488 \n", - "2 21.362895 1.293556 0.948684 \n", - "3 17.586137 1.074212 0.832895 \n", - "\n", - " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n", - "0 0.060870 Econ/Fin 1 \n", - "1 0.521168 Web/CloudOps 7 \n", - "2 0.904205 Web/CloudOps 7 \n", - "3 0.941065 Web/CloudOps 7 " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetmodeleval_metrics/MSE[mean]eval_metrics/MSE[0.5]eval_metrics/MAE[0.5]eval_metrics/MASE[0.5]eval_metrics/MAPE[0.5]eval_metrics/sMAPE[0.5]eval_metrics/MSISeval_metrics/RMSE[mean]eval_metrics/NRMSE[mean]eval_metrics/ND[0.5]eval_metrics/mean_weighted_sum_quantile_lossdomainnum_variates
0m4_weekly/W/shortSeasonal_Naive453525.145918453525.145918347.9914832.7772950.0893730.09161326.631225673.4427560.1226910.0633990.060870Econ/Fin1
1bizitobs_l2c/H/shortSeasonal_Naive281.843068281.84306812.5316531.2140641.3605901.1383737.48693116.7881820.9049260.6754880.521168Web/CloudOps7
2bizitobs_l2c/H/mediumSeasonal_Naive456.373289456.37328915.6673921.5102861.6912911.40241018.53365421.3628951.2935560.9486840.904205Web/CloudOps7
3bizitobs_l2c/H/longSeasonal_Naive309.272222309.27222213.6354881.4260542.4383110.91685422.03619817.5861371.0742120.8328950.941065Web/CloudOps7
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "official_eval_sn" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "id": "OCifh_5D9B05" - }, - "outputs": [], - "source": [ - "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0wapKMgTFScM" - }, - "source": [ - "## Changelog\n", - "\n", - "### **2025-11-06**\n", - "\n", - "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n", - "\n", - "### **2025-08-05**\n", - "\n", - "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)." + "text/plain": [ + " dataset model eval_metrics/MSE[mean] \\\n", + "0 m4_weekly/W/short Seasonal_Naive 453525.145918 \n", + "1 bizitobs_l2c/H/short Seasonal_Naive 281.843068 \n", + "2 bizitobs_l2c/H/medium Seasonal_Naive 456.373289 \n", + "3 bizitobs_l2c/H/long Seasonal_Naive 309.272222 \n", + "\n", + " eval_metrics/MSE[0.5] eval_metrics/MAE[0.5] eval_metrics/MASE[0.5] \\\n", + "0 453525.145918 347.991483 2.777295 \n", + "1 281.843068 12.531653 1.214064 \n", + "2 456.373289 15.667392 1.510286 \n", + "3 309.272222 13.635488 1.426054 \n", + "\n", + " eval_metrics/MAPE[0.5] eval_metrics/sMAPE[0.5] eval_metrics/MSIS \\\n", + "0 0.089373 0.091613 26.631225 \n", + "1 1.360590 1.138373 7.486931 \n", + "2 1.691291 1.402410 18.533654 \n", + "3 2.438311 0.916854 22.036198 \n", + "\n", + " eval_metrics/RMSE[mean] eval_metrics/NRMSE[mean] eval_metrics/ND[0.5] \\\n", + "0 673.442756 0.122691 0.063399 \n", + "1 16.788182 0.904926 0.675488 \n", + "2 21.362895 1.293556 0.948684 \n", + "3 17.586137 1.074212 0.832895 \n", + "\n", + " eval_metrics/mean_weighted_sum_quantile_loss domain num_variates \n", + "0 0.060870 Econ/Fin 1 \n", + "1 0.521168 Web/CloudOps 7 \n", + "2 0.904205 Web/CloudOps 7 \n", + "3 0.941065 Web/CloudOps 7 " ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [], - "runtime_attributes": { - "runtime_version": "2025.07" - } - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } + ], + "source": [ + "official_eval_sn" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "OCifh_5D9B05" + }, + "outputs": [], + "source": [ + "pd.testing.assert_frame_equal(official_eval_sn, eval_df_sn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0wapKMgTFScM" + }, + "source": [ + "## Changelog\n", + "\n", + "### **2025-11-06**\n", + "\n", + "We introduced newer models based on the most recent progress in the field: Chronos-2, TimesFM-2.5 and TiRex.\n", + "\n", + "### **2025-08-05**\n", + "\n", + "GIFT‑Eval recently [enhanced its evaluation dashboard](https://github.com/SalesforceAIResearch/gift-eval?tab=readme-ov-file#2025-08-05) with a new flag that identifies models likely affected by data leakage (i.e., having seen parts of the test set during training). While the test set itself hasn’t changed, this new insight helps us better interpret model performance. To keep our results focused on truly unseen data, we’ve excluded any flagged models from this experiment and added the Sundial model to the ensemble. The previous experiment details remain available [here](https://github.com/TimeCopilot/timecopilot/tree/v0.0.14/experiments/gift-eval)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [], + "runtime_attributes": { + "runtime_version": "2025.07" + } + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/docs/examples/google-llms.ipynb b/docs/examples/google-llms.ipynb index 7abb7bc6..9beba8db 100644 --- a/docs/examples/google-llms.ipynb +++ b/docs/examples/google-llms.ipynb @@ -38,6 +38,7 @@ "outputs": [], "source": [ "import nest_asyncio\n", + "\n", "nest_asyncio.apply()\n", "\n", "from timecopilot import TimeCopilot\n", @@ -168,7 +169,7 @@ "outputs": [], "source": [ "tc = TimeCopilot(\n", - " llm='google-gla:gemini-3-pro-preview',\n", + " llm=\"google-gla:gemini-3-pro-preview\",\n", ")" ] }, @@ -191,8 +192,8 @@ "from pydantic_ai.models.google import GoogleModel\n", "from pydantic_ai.providers.google import GoogleProvider\n", "\n", - "provider = GoogleProvider(api_key='your-api-key')\n", - "google_model = GoogleModel('gemini-3-pro-preview', provider=provider)\n", + "provider = GoogleProvider(api_key=\"your-api-key\")\n", + "google_model = GoogleModel(\"gemini-3-pro-preview\", provider=provider)\n", "tc = TimeCopilot(\n", " llm=google_model,\n", ")" diff --git a/docs/examples/llm-providers.ipynb b/docs/examples/llm-providers.ipynb index bab596fa..00af2955 100644 --- a/docs/examples/llm-providers.ipynb +++ b/docs/examples/llm-providers.ipynb @@ -35,6 +35,7 @@ "outputs": [], "source": [ "import nest_asyncio\n", + "\n", "nest_asyncio.apply()" ] }, @@ -46,7 +47,7 @@ "outputs": [], "source": [ "import pandas as pd\n", - "from timecopilot import TimeCopilot\n" + "from timecopilot import TimeCopilot" ] }, { @@ -276,10 +277,7 @@ "metadata": {}, "outputs": [], "source": [ - "tc = TimeCopilot(\n", - " llm=\"openai:gpt-4o\",\n", - " retries=3\n", - ")" + "tc = TimeCopilot(llm=\"openai:gpt-4o\", retries=3)" ] }, { @@ -365,10 +363,7 @@ "metadata": {}, "outputs": [], "source": [ - "tc = TimeCopilot(\n", - " llm='ollama:gpt-oss:20b',\n", - " retries=3\n", - ")" + "tc = TimeCopilot(llm=\"ollama:gpt-oss:20b\", retries=3)" ] }, { @@ -396,10 +391,7 @@ " provider=OllamaProvider(base_url=\"http://localhost:11434/v1\"),\n", ")\n", "\n", - "tc = TimeCopilot(\n", - " llm=llm,\n", - " retries=3\n", - ")" + "tc = TimeCopilot(llm=llm, retries=3)" ] }, { diff --git a/docs/examples/sktime.ipynb b/docs/examples/sktime.ipynb index 276e40b6..dc1ce7a9 100644 --- a/docs/examples/sktime.ipynb +++ b/docs/examples/sktime.ipynb @@ -26,6 +26,7 @@ "outputs": [], "source": [ "import nest_asyncio\n", + "\n", "nest_asyncio.apply()\n", "\n", "import timecopilot\n", @@ -108,10 +109,7 @@ "model_list = timecopilot.agent.DEFAULT_MODELS.copy()\n", "model_list.append(adapted_model)\n", "\n", - "tc = timecopilot.TimeCopilot(\n", - " llm=\"openai:gpt-4o\",\n", - " forecasters=model_list\n", - ")" + "tc = timecopilot.TimeCopilot(llm=\"openai:gpt-4o\", forecasters=model_list)" ] }, { diff --git a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb index e0ad916f..f61767cd 100644 --- a/docs/examples/ts-foundation-models-comparison-quickstart.ipynb +++ b/docs/examples/ts-foundation-models-comparison-quickstart.ipynb @@ -132,7 +132,7 @@ " \"https://timecopilot.s3.amazonaws.com/public/data/events_pageviews.csv\",\n", " parse_dates=[\"ds\"],\n", ")\n", - "df.head()\n" + "df.head()" ] }, { @@ -202,14 +202,13 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "tcf = TimeCopilotForecaster(\n", " models=[\n", " AutoARIMA(),\n", " Chronos(repo_id=\"amazon/chronos-bolt-small\"),\n", - " Moirai(), \n", - " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"), \n", - " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"), \n", + " Moirai(),\n", + " TimesFM(repo_id=\"google/timesfm-2.5-200m-pytorch\", alias=\"TimesFM-2.5\"),\n", + " TimesFM(repo_id=\"google/timesfm-2.0-500m-pytorch\", alias=\"TimesFM-2.0\"),\n", " SeasonalNaive(),\n", " ]\n", ")" @@ -236,8 +235,14 @@ "metadata": {}, "outputs": [], "source": [ - "level = [0, 20, 40, 60, 80] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n", - "cv_df = tcf.cross_validation(df=df, h=12, level=level) " + "level = [\n", + " 0,\n", + " 20,\n", + " 40,\n", + " 60,\n", + " 80,\n", + "] # zero level is strange (it's the median/point forecast), but that comes from the required inputs by TimesFM\n", + "cv_df = tcf.cross_validation(df=df, h=12, level=level)" ] }, { @@ -605,12 +610,14 @@ ], "source": [ "eval_df = evaluate(\n", - " cv_df.drop(columns=[\"cutoff\"]), \n", - " train_df=df.query(\"ds <= '2024-08-31'\"), \n", + " cv_df.drop(columns=[\"cutoff\"]),\n", + " train_df=df.query(\"ds <= '2024-08-31'\"),\n", " metrics=[partial(mase, seasonality=12), scaled_crps],\n", " level=level,\n", ")\n", - "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(3)" + "eval_df.groupby(\"metric\").mean(numeric_only=True).T.sort_values(by=\"scaled_crps\").round(\n", + " 3\n", + ")" ] } ], diff --git a/pyproject.toml b/pyproject.toml index 0e5769f5..c70b8ab6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ dependencies = [ "transformers==4.40.1 ; python_full_version < '3.13'", "transformers>=4.48,<5 ; python_full_version >= '3.13'", "tsfeatures", - "utilsforecast[plotting]", + "utilsforecast[plotting]>=0.2.15", ] description = "The GenAI Forecasting Agent · LLMs × Time Series Foundation Models" license = "MIT" diff --git a/tests/test_live.py b/tests/test_live.py index 9d870393..01d2201f 100644 --- a/tests/test_live.py +++ b/tests/test_live.py @@ -88,12 +88,12 @@ def test_is_queryable(): retries=3, ) assert not tc.is_queryable() - result = tc.forecast( + tc.forecast( df=df, query=f"Please forecast the series with a horizon of {h} and frequency D.", ) assert tc.is_queryable() - result = tc.query("how much will change the series with id 0?") + result = tc.query("how much will the series change?") print(result.output) @@ -148,7 +148,8 @@ async def test_async_is_queryable(): query=f"Please forecast the series with a horizon of {h} and frequency D.", ) assert tc.is_queryable() - answer = await tc.query("how much will change the series with id 0?") + series_id = df["unique_id"].iloc[0] + answer = await tc.query(f"how much will change the series with id {series_id}?") print(answer.output) diff --git a/tests/utils/test_experiment_handler.py b/tests/utils/test_experiment_handler.py index caccf4f3..5340585d 100644 --- a/tests/utils/test_experiment_handler.py +++ b/tests/utils/test_experiment_handler.py @@ -7,6 +7,7 @@ from pydantic_ai.models.function import AgentInfo, FunctionModel from utilsforecast.data import generate_series from utilsforecast.evaluation import evaluate +from utilsforecast.losses import mase from utilsforecast.processing import ( backtest_splits, drop_index_if_pandas, @@ -24,8 +25,6 @@ from timecopilot.utils.experiment_handler import ( ExperimentDataset, ExperimentDatasetParser, - generate_train_cv_splits, - mase, ) @@ -233,36 +232,6 @@ def test_parse_params_no_query_infers_all(): ) -@pytest.mark.parametrize( - "freq,n_windows,h,step_size", - [ - ("H", 3, 2, 1), - ("H", 1, 12, None), - ("MS", 3, 2, 2), - ], -) -def test_generate_train_cv_splits(freq, n_windows, h, step_size): - df = generate_series(n_series=5, freq=freq) - df["unique_id"] = df["unique_id"].astype(int) - df_cv = generate_train_cv_splits_from_backtest_splits( - df=df, - n_windows=n_windows, - step_size=step_size, - h=h, - freq=freq, - ) - cutoffs = df_cv[["unique_id", "cutoff"]].drop_duplicates() - train_cv_splits = generate_train_cv_splits( - df=df, - cutoffs=cutoffs, - ) - p_sort_df = partial(sort_df, cols=["unique_id", "cutoff", "ds"]) - pd.testing.assert_frame_equal( - p_sort_df(df_cv), - p_sort_df(train_cv_splits), - ) - - @pytest.mark.parametrize("model", models) def test_eval(model): freq = "H" diff --git a/timecopilot/forecaster.py b/timecopilot/forecaster.py index 4c418845..a27ac5f4 100644 --- a/timecopilot/forecaster.py +++ b/timecopilot/forecaster.py @@ -96,9 +96,11 @@ def _call_models( res_df_model = fn(**known_kwargs, **kwargs) res_df_model = res_df_model.rename( columns={ - col: col.replace(self.fallback_model.alias, model.alias) - if col.startswith(self.fallback_model.alias) - else col + col: ( + col.replace(self.fallback_model.alias, model.alias) + if col.startswith(self.fallback_model.alias) + else col + ) for col in res_df_model.columns } ) diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py index 864ffe70..5e9bc4cb 100644 --- a/timecopilot/utils/experiment_handler.py +++ b/timecopilot/utils/experiment_handler.py @@ -10,7 +10,7 @@ from pydantic_ai import Agent from pydantic_ai.agent import AgentRunResult from utilsforecast.evaluation import evaluate -from utilsforecast.losses import _zero_to_nan, mae +from utilsforecast.losses import mase from ..models.utils.forecaster import ( get_seasonality, @@ -24,41 +24,6 @@ ) -def mase( - df: pd.DataFrame, - models: list[str], - seasonality: int, - train_df: pd.DataFrame, - id_col: str = "unique_id", - target_col: str = "y", -) -> pd.DataFrame: - mean_abs_err = mae(df, models, id_col, target_col) - mean_abs_err = mean_abs_err.set_index(id_col) - # assume train_df is sorted - lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality) - scale = train_df[target_col].sub(lagged).abs() - scale = scale.groupby(train_df[id_col], observed=True).mean() - scale[scale < 1e-2] = 0.0 - res = mean_abs_err.div(_zero_to_nan(scale), axis=0).fillna(0) - res.index.name = id_col - res = res.reset_index() - return res - - -def generate_train_cv_splits( - df: pd.DataFrame, - cutoffs: pd.DataFrame, -) -> pd.DataFrame: - """ - based on `cutoffs` (columns `unique_id`, `cutoffs`) - generates train cv splits using `df` - """ - df = df.merge(cutoffs, on="unique_id", how="outer") - df = df.query("ds <= cutoff") - df = df.reset_index(drop=True) - return df - - class DatasetParams(BaseModel): # TODO: make these required freq: str | None = Field(description="The frequency of the data", default=None) @@ -228,27 +193,14 @@ def evaluate_forecast_df( if forecast_df[model].isna().sum() > 0: print(forecast_df.loc[forecast_df[model].isna()]["unique_id"].unique()) raise ValueError(f"model {model} has NaN values") - cutoffs = forecast_df[["unique_id", "cutoff"]].drop_duplicates() - train_cv_splits = generate_train_cv_splits(df=self.df, cutoffs=cutoffs) - - def add_id_cutoff(df: pd.DataFrame): - df["id_cutoff"] = ( - df["unique_id"].astype(str) + "-" + df["cutoff"].astype(str) - ) - for df in [cutoffs, train_cv_splits, forecast_df]: - add_id_cutoff(df) partial_mase = partial(mase, seasonality=self.seasonality) eval_df = evaluate( df=forecast_df, - train_df=train_cv_splits, + train_df=self.df, metrics=[partial_mase], models=models, - id_col="id_cutoff", ) - eval_df = eval_df.merge(cutoffs, on=["id_cutoff"]) - eval_df = eval_df.drop(columns=["id_cutoff"]) - eval_df = eval_df[["unique_id", "cutoff", "metric"] + models] return eval_df diff --git a/uv.lock b/uv.lock index 39bbcae4..4f4ff21b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'linux'", @@ -7373,7 +7373,7 @@ requires-dist = [ { name = "transformers", marker = "python_full_version < '3.13'", specifier = "==4.40.1" }, { name = "transformers", marker = "python_full_version >= '3.13'", specifier = ">=4.48,<5" }, { name = "tsfeatures" }, - { name = "utilsforecast", extras = ["plotting"] }, + { name = "utilsforecast", extras = ["plotting"], specifier = ">=0.2.15" }, ] [package.metadata.requires-dev] @@ -8176,18 +8176,19 @@ wheels = [ [[package]] name = "utilsforecast" -version = "0.2.12" +version = "0.2.15" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "narwhals" }, { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, { name = "numpy", version = "2.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, { name = "packaging" }, { name = "pandas", version = "2.1.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/48/d9/21e43a7419f0356043b52f72cd0262dd497f087fca78e90aebf8201a2339/utilsforecast-0.2.12.tar.gz", hash = "sha256:73f9dfd836a721a95c349f784bd75e18a4cb7c1469800e325414e22901e9775b", size = 41524, upload-time = "2025-02-24T19:41:56.25Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/55/8a37bb9ce36541fd353466259a07ccfdfaf25c996f3a71d989af4d4c7ba4/utilsforecast-0.2.15.tar.gz", hash = "sha256:c36d65d698a88d0fadc93d2d6737c304c3776397c60ae551ee17aa678caf3659", size = 60609, upload-time = "2025-12-03T16:29:08.652Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/75/9b/f6336ce71f4e6ed32877309314f549192cd6b982ad6d96fd8b1b5a230870/utilsforecast-0.2.12-py3-none-any.whl", hash = "sha256:acfba80bbf44e18433c206194f3ddd89cc28ff03aa0ba744b8040795a40b7b3f", size = 42219, upload-time = "2025-02-24T19:41:53.795Z" }, + { url = "https://files.pythonhosted.org/packages/6f/11/6c6ee61958b8e60f634b39e2f9a004f5d1c479cb962a2001fc3c72ceed78/utilsforecast-0.2.15-py3-none-any.whl", hash = "sha256:4b43bf5107e3cba13604cd86e93b5cf4906b57105b1900ccf98b8978aabd4150", size = 40344, upload-time = "2025-12-03T16:29:07.144Z" }, ] [package.optional-dependencies]