From be561cacdbe8a6ec3f46d5bbca9e5bbbcf000313 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 28 Jan 2026 06:18:53 +0000 Subject: [PATCH 1/3] Initial plan From 7d4d30e8aaf8bcd1d861764dca45a06f6b444833 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 28 Jan 2026 07:09:10 +0000 Subject: [PATCH 2/3] Add narwhals dataframe support Co-authored-by: AzulGarza <10517170+AzulGarza@users.noreply.github.com> --- README.md | 1 + docs/getting-started/quickstart.md | 1 + pyproject.toml | 1 + tests/utils/test_df_utils.py | 83 +++++++++++++++++++++++++ timecopilot/utils/df_utils.py | 39 ++++++++++++ timecopilot/utils/experiment_handler.py | 3 + uv.lock | 2 + 7 files changed, 130 insertions(+) create mode 100644 tests/utils/test_df_utils.py create mode 100644 timecopilot/utils/df_utils.py diff --git a/README.md b/README.md index d393dcc..82fea15 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ import pandas as pd from timecopilot import TimeCopilot # Load the dataset +# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals). # The DataFrame must include at least the following columns: # - unique_id: Unique identifier for each time series (string) # - ds: Date column (datetime format) diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 3df9eef..9d18f4f 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -78,6 +78,7 @@ import pandas as pd from timecopilot import TimeCopilot # Load the dataset +# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals). # The DataFrame must include at least the following columns: # - unique_id: Unique identifier for each time series (string) # - ds: Date column (datetime format) diff --git a/pyproject.toml b/pyproject.toml index e995d9d..a032353 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dependencies = [ "lightgbm>=4.6.0", "logfire>=4.7.0", "mlforecast>=1.0.2", + "narwhals>=2.7.0", "neuralforecast>=3.0.2", "nixtla>=0.6.6", "openai>=1.99.7", diff --git a/tests/utils/test_df_utils.py b/tests/utils/test_df_utils.py new file mode 100644 index 0000000..3cb9c03 --- /dev/null +++ b/tests/utils/test_df_utils.py @@ -0,0 +1,83 @@ +from importlib import util +from pathlib import Path + +import narwhals as nw +import pandas as pd +import pytest + + +def load_to_pandas(): + module_path = ( + Path(__file__).resolve().parents[2] / "timecopilot" / "utils" / "df_utils.py" + ) + spec = util.spec_from_file_location("df_utils", module_path) + if spec is None or spec.loader is None: + raise RuntimeError("Failed to load df_utils module.") + module = util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.to_pandas + + +def test_to_pandas_accepts_pandas_dataframe(): + df = pd.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]}) + to_pandas = load_to_pandas() + result = to_pandas(df) + assert isinstance(result, pd.DataFrame) + pd.testing.assert_frame_equal(result, df) + + +def test_to_pandas_accepts_narwhals_dataframe(): + nw_df = nw.from_dict( + {"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]}, + backend="pandas", + ) + to_pandas = load_to_pandas() + result = to_pandas(nw_df) + assert isinstance(result, pd.DataFrame) + assert list(result.columns) == ["unique_id", "ds", "y"] + + +def test_to_pandas_accepts_polars_dataframe(): + pl = pytest.importorskip("polars") + pytest.importorskip("pyarrow") + + pl_df = pl.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]}) + to_pandas = load_to_pandas() + result = to_pandas(pl_df) + assert isinstance(result, pd.DataFrame) + assert list(result.columns) == ["unique_id", "ds", "y"] + + +def test_to_pandas_fallback_for_unknown_dataframe(): + fallback = pd.DataFrame({"unique_id": ["fallback"], "ds": ["2024-01-01"], "y": [0.0]}) + to_pandas = load_to_pandas() + result = to_pandas(object(), fallback=fallback) + pd.testing.assert_frame_equal(result, fallback) + + +def test_to_pandas_raises_for_unknown_dataframe(): + to_pandas = load_to_pandas() + with pytest.raises(TypeError, match="Unsupported dataframe type"): + to_pandas(object()) + + +def test_validate_df_accepts_polars_dataframe(): + pl = pytest.importorskip("polars") + pytest.importorskip("pyarrow") + + df = pl.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]}) + + module_path = ( + Path(__file__).resolve().parents[2] + / "timecopilot" + / "utils" + / "experiment_handler.py" + ) + spec = util.spec_from_file_location("experiment_handler", module_path) + if spec is None or spec.loader is None: + raise RuntimeError("Failed to load experiment_handler module.") + module = util.module_from_spec(spec) + spec.loader.exec_module(module) + + parsed = module.ExperimentDatasetParser._validate_df(df) + assert isinstance(parsed, pd.DataFrame) diff --git a/timecopilot/utils/df_utils.py b/timecopilot/utils/df_utils.py new file mode 100644 index 0000000..769b02c --- /dev/null +++ b/timecopilot/utils/df_utils.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any, overload + +import pandas as pd + +import narwhals as nw + +_UNSUPPORTED_DF_MESSAGE = ( + "Unsupported dataframe type. Install narwhals to enable support for " + "polars and other dataframe libraries." +) + + +@overload +def to_pandas(df: pd.DataFrame) -> pd.DataFrame: ... + + +@overload +def to_pandas(df: Any, *, fallback: pd.DataFrame) -> pd.DataFrame: ... + + +@overload +def to_pandas(df: Any) -> pd.DataFrame: ... + + +def to_pandas( + df: Any, + *, + fallback: pd.DataFrame | None = None, +) -> pd.DataFrame: + if isinstance(df, pd.DataFrame): + return df + try: + return nw.from_native(df).to_pandas() + except TypeError as exc: + if fallback is not None: + return fallback + raise TypeError(_UNSUPPORTED_DF_MESSAGE) from exc diff --git a/timecopilot/utils/experiment_handler.py b/timecopilot/utils/experiment_handler.py index 5e9bc4c..46ca673 100644 --- a/timecopilot/utils/experiment_handler.py +++ b/timecopilot/utils/experiment_handler.py @@ -17,6 +17,7 @@ maybe_convert_col_to_datetime, maybe_infer_freq, ) +from .df_utils import to_pandas warnings.simplefilter( action="ignore", @@ -103,6 +104,8 @@ def read_df(path: str | Path) -> pd.DataFrame: def _validate_df(df: pd.DataFrame | str | Path) -> pd.DataFrame: if isinstance(df, str | Path): df = ExperimentDatasetParser.read_df(df) + else: + df = to_pandas(df) if "unique_id" not in df.columns: df["unique_id"] = "series_0" return maybe_convert_col_to_datetime(df, "ds") diff --git a/uv.lock b/uv.lock index 5398584..33fcb50 100644 --- a/uv.lock +++ b/uv.lock @@ -6830,6 +6830,7 @@ dependencies = [ { name = "lightgbm" }, { name = "logfire" }, { name = "mlforecast" }, + { name = "narwhals" }, { name = "neuralforecast" }, { name = "nixtla" }, { name = "openai" }, @@ -6883,6 +6884,7 @@ requires-dist = [ { name = "lightgbm", specifier = ">=4.6.0" }, { name = "logfire", specifier = ">=4.7.0" }, { name = "mlforecast", specifier = ">=1.0.2" }, + { name = "narwhals", specifier = ">=2.7.0" }, { name = "neuralforecast", specifier = ">=3.0.2" }, { name = "nixtla", specifier = ">=0.6.6" }, { name = "openai", specifier = ">=1.99.7" }, From 32a9acdb9d46214c9b6aed6e343eb07c8e4a8bba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 28 Jan 2026 19:24:04 +0000 Subject: [PATCH 3/3] Broaden polars input support Co-authored-by: AzulGarza <10517170+AzulGarza@users.noreply.github.com> --- README.md | 3 +- docs/getting-started/quickstart.md | 3 +- tests/utils/test_df_utils.py | 28 +++++++++++-------- timecopilot/forecaster.py | 5 ++++ timecopilot/models/adapters/sktime.py | 2 ++ timecopilot/models/ensembles/median.py | 2 ++ timecopilot/models/foundation/chronos.py | 2 ++ timecopilot/models/foundation/flowstate.py | 2 ++ timecopilot/models/foundation/sundial.py | 2 ++ timecopilot/models/foundation/tabpfn.py | 2 ++ timecopilot/models/foundation/timegpt.py | 2 ++ timecopilot/models/foundation/timesfm.py | 3 ++ timecopilot/models/foundation/tirex.py | 2 ++ timecopilot/models/foundation/toto.py | 2 ++ timecopilot/models/foundation/utils.py | 3 ++ timecopilot/models/ml.py | 2 ++ timecopilot/models/neural.py | 3 ++ timecopilot/models/stats.py | 12 ++++++++ timecopilot/models/utils/forecaster.py | 6 ++++ .../models/utils/gluonts_forecaster.py | 2 ++ .../models/utils/parallel_forecaster.py | 2 ++ 21 files changed, 76 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 82fea15..7dd991b 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,8 @@ import pandas as pd from timecopilot import TimeCopilot # Load the dataset -# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals). +# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals); inputs +# are normalized to pandas before modeling. # The DataFrame must include at least the following columns: # - unique_id: Unique identifier for each time series (string) # - ds: Date column (datetime format) diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 9d18f4f..4c6825d 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -78,7 +78,8 @@ import pandas as pd from timecopilot import TimeCopilot # Load the dataset -# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals). +# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals); inputs +# are normalized to pandas before modeling. # The DataFrame must include at least the following columns: # - unique_id: Unique identifier for each time series (string) # - ds: Date column (datetime format) diff --git a/tests/utils/test_df_utils.py b/tests/utils/test_df_utils.py index 3cb9c03..e605957 100644 --- a/tests/utils/test_df_utils.py +++ b/tests/utils/test_df_utils.py @@ -18,6 +18,21 @@ def load_to_pandas(): return module.to_pandas +def load_experiment_handler(): + module_path = ( + Path(__file__).resolve().parents[2] + / "timecopilot" + / "utils" + / "experiment_handler.py" + ) + spec = util.spec_from_file_location("experiment_handler", module_path) + if spec is None or spec.loader is None: + raise RuntimeError("Failed to load experiment_handler module.") + module = util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + def test_to_pandas_accepts_pandas_dataframe(): df = pd.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]}) to_pandas = load_to_pandas() @@ -67,17 +82,6 @@ def test_validate_df_accepts_polars_dataframe(): df = pl.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]}) - module_path = ( - Path(__file__).resolve().parents[2] - / "timecopilot" - / "utils" - / "experiment_handler.py" - ) - spec = util.spec_from_file_location("experiment_handler", module_path) - if spec is None or spec.loader is None: - raise RuntimeError("Failed to load experiment_handler module.") - module = util.module_from_spec(spec) - spec.loader.exec_module(module) - + module = load_experiment_handler() parsed = module.ExperimentDatasetParser._validate_df(df) assert isinstance(parsed, pd.DataFrame) diff --git a/timecopilot/forecaster.py b/timecopilot/forecaster.py index a27ac5f..0184d02 100644 --- a/timecopilot/forecaster.py +++ b/timecopilot/forecaster.py @@ -1,6 +1,7 @@ import pandas as pd from .models.utils.forecaster import Forecaster +from .utils.df_utils import to_pandas class TimeCopilotForecaster(Forecaster): @@ -73,6 +74,7 @@ def _call_models( quantiles: list[float] | None, **kwargs, ) -> pd.DataFrame: + df = to_pandas(df) # infer just once to avoid multiple calls to _maybe_infer_freq freq = self._maybe_infer_freq(df, freq) res_df: pd.DataFrame | None = None @@ -173,6 +175,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) return self._call_models( "forecast", merge_on=["unique_id", "ds"], @@ -247,6 +250,7 @@ def cross_validation( - prediction intervals if `level` is specified. - quantile forecasts if `quantiles` is specified. """ + df = to_pandas(df) return self._call_models( "cross_validation", merge_on=["unique_id", "ds", "cutoff"], @@ -316,6 +320,7 @@ def detect_anomalies( an anomaly is defined as a value that is outside of the prediction interval (True or False). """ + df = to_pandas(df) return self._call_models( "detect_anomalies", merge_on=["unique_id", "ds", "cutoff"], diff --git a/timecopilot/models/adapters/sktime.py b/timecopilot/models/adapters/sktime.py index a258226..dde500a 100644 --- a/timecopilot/models/adapters/sktime.py +++ b/timecopilot/models/adapters/sktime.py @@ -4,6 +4,7 @@ import pandas as pd from ..utils.forecaster import Forecaster +from ..utils.df_utils import to_pandas # from sktime.forecasting.base import BaseForecaster, ForecastingHorizon @@ -128,6 +129,7 @@ def forecast( "Level and quantiles are not supported for adapted sktime models yet." ) # NOTE: may not be needed + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) forecast_horizon = np.arange(1, 1 + h) id_col = "unique_id" diff --git a/timecopilot/models/ensembles/median.py b/timecopilot/models/ensembles/median.py index 7b9335e..ff69bd9 100644 --- a/timecopilot/models/ensembles/median.py +++ b/timecopilot/models/ensembles/median.py @@ -3,6 +3,7 @@ from ... import TimeCopilotForecaster from ..utils.forecaster import Forecaster, QuantileConverter +from ..utils.df_utils import to_pandas class MedianEnsemble(Forecaster): @@ -116,6 +117,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) qc = QuantileConverter(level=level, quantiles=quantiles) _fcst_df = self.tcf._call_models( "forecast", diff --git a/timecopilot/models/foundation/chronos.py b/timecopilot/models/foundation/chronos.py index 95b2906..98e4785 100644 --- a/timecopilot/models/foundation/chronos.py +++ b/timecopilot/models/foundation/chronos.py @@ -12,6 +12,7 @@ from tqdm import tqdm from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas from .utils import TimeSeriesDataset @@ -216,6 +217,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size) diff --git a/timecopilot/models/foundation/flowstate.py b/timecopilot/models/foundation/flowstate.py index de9d95b..5587e33 100644 --- a/timecopilot/models/foundation/flowstate.py +++ b/timecopilot/models/foundation/flowstate.py @@ -9,6 +9,7 @@ from tsfm_public.models.flowstate.utils.utils import get_fixed_factor from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas from .utils import TimeSeriesDataset @@ -257,6 +258,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) dataset = TimeSeriesDataset.from_df( diff --git a/timecopilot/models/foundation/sundial.py b/timecopilot/models/foundation/sundial.py index 3ee6a72..0e32f20 100644 --- a/timecopilot/models/foundation/sundial.py +++ b/timecopilot/models/foundation/sundial.py @@ -12,6 +12,7 @@ from transformers import AutoModelForCausalLM from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas from .utils import TimeSeriesDataset @@ -257,6 +258,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size) diff --git a/timecopilot/models/foundation/tabpfn.py b/timecopilot/models/foundation/tabpfn.py index 44b5cd0..8cbb53b 100644 --- a/timecopilot/models/foundation/tabpfn.py +++ b/timecopilot/models/foundation/tabpfn.py @@ -26,6 +26,7 @@ ) from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas class TabPFN(Forecaster): @@ -199,6 +200,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) if qc.quantiles is not None and not np.allclose( diff --git a/timecopilot/models/foundation/timegpt.py b/timecopilot/models/foundation/timegpt.py index ac88810..2ebbc6a 100644 --- a/timecopilot/models/foundation/timegpt.py +++ b/timecopilot/models/foundation/timegpt.py @@ -4,6 +4,7 @@ from nixtla import NixtlaClient from ..utils.forecaster import Forecaster +from ...utils.df_utils import to_pandas class TimeGPT(Forecaster): @@ -129,6 +130,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) client = self._get_client() fcst_df = client.forecast( diff --git a/timecopilot/models/foundation/timesfm.py b/timecopilot/models/foundation/timesfm.py index 66e89e0..c3b295a 100644 --- a/timecopilot/models/foundation/timesfm.py +++ b/timecopilot/models/foundation/timesfm.py @@ -12,6 +12,7 @@ from tqdm import tqdm from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas from .utils import TimeSeriesDataset @@ -88,6 +89,7 @@ def forecast( level: list[int | float] | None = None, quantiles: list[float] | None = None, ) -> pd.DataFrame: + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) if qc.quantiles is not None and len(qc.quantiles) != len(DEFAULT_QUANTILES_TFM): @@ -197,6 +199,7 @@ def forecast( level: list[int | float] | None = None, quantiles: list[float] | None = None, ) -> pd.DataFrame: + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) if qc.quantiles is not None and len(qc.quantiles) != len(DEFAULT_QUANTILES_TFM): diff --git a/timecopilot/models/foundation/tirex.py b/timecopilot/models/foundation/tirex.py index e370c70..501dc81 100644 --- a/timecopilot/models/foundation/tirex.py +++ b/timecopilot/models/foundation/tirex.py @@ -13,6 +13,7 @@ from tqdm import tqdm from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas from .utils import TimeSeriesDataset @@ -167,6 +168,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size) diff --git a/timecopilot/models/foundation/toto.py b/timecopilot/models/foundation/toto.py index 735c4c5..e6184bd 100644 --- a/timecopilot/models/foundation/toto.py +++ b/timecopilot/models/foundation/toto.py @@ -9,6 +9,7 @@ from tqdm import tqdm from ..utils.forecaster import Forecaster, QuantileConverter +from ...utils.df_utils import to_pandas from .utils import TimeSeriesDataset @@ -222,6 +223,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size) diff --git a/timecopilot/models/foundation/utils.py b/timecopilot/models/foundation/utils.py index 287be32..33a7636 100644 --- a/timecopilot/models/foundation/utils.py +++ b/timecopilot/models/foundation/utils.py @@ -4,6 +4,8 @@ import torch from utilsforecast.processing import make_future_dataframe +from ...utils.df_utils import to_pandas + class TimeSeriesDataset: def __init__( @@ -29,6 +31,7 @@ def from_df( batch_size: int, dtype: torch.dtype = torch.bfloat16, ): + df = to_pandas(df) tensors = [] df_sorted = df.sort_values(by=["unique_id", "ds"]) for _, group in df_sorted.groupby("unique_id"): diff --git a/timecopilot/models/ml.py b/timecopilot/models/ml.py index e247f5c..f7144f6 100644 --- a/timecopilot/models/ml.py +++ b/timecopilot/models/ml.py @@ -4,6 +4,7 @@ from mlforecast.auto import AutoLightGBM, AutoMLForecast from .utils.forecaster import Forecaster, get_seasonality +from ..utils.df_utils import to_pandas os.environ["NIXTLA_ID_AS_COL"] = "true" @@ -84,6 +85,7 @@ def forecast( if level is not None or quantiles is not None: raise ValueError("Level and quantiles are not supported for AutoLGBM yet.") + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) mf = AutoMLForecast( models=[AutoLightGBM()], diff --git a/timecopilot/models/neural.py b/timecopilot/models/neural.py index dcc2439..92e6cb0 100644 --- a/timecopilot/models/neural.py +++ b/timecopilot/models/neural.py @@ -12,6 +12,7 @@ from ray import tune from .utils.forecaster import Forecaster +from ..utils.df_utils import to_pandas os.environ["NIXTLA_ID_AS_COL"] = "true" @@ -108,6 +109,7 @@ def forecast( if level is not None or quantiles is not None: raise ValueError("Level and quantiles are not supported for AutoNHITS yet.") + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) if self.config is None: config = _AutoNHITS.get_default_config(h=h, backend="ray") @@ -209,6 +211,7 @@ def forecast( if level is not None or quantiles is not None: raise ValueError("Level and quantiles are not supported for AutoTFT yet.") + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) if self.config is None: config = _AutoTFT.get_default_config(h=h, backend="ray") diff --git a/timecopilot/models/stats.py b/timecopilot/models/stats.py index e5a7b74..416c13d 100644 --- a/timecopilot/models/stats.py +++ b/timecopilot/models/stats.py @@ -41,6 +41,7 @@ from statsforecast.utils import ConformalIntervals from .utils.forecaster import Forecaster, QuantileConverter, get_seasonality +from ..utils.df_utils import to_pandas os.environ["NIXTLA_ID_AS_COL"] = "true" @@ -144,6 +145,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) fcst_df = run_statsforecast_model( model=_ADIDA(alias=self.alias), @@ -329,6 +331,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) season_length = self._maybe_get_seasonality(inferred_freq) fcst_df = run_statsforecast_model( @@ -458,6 +461,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) season_length = self._maybe_get_seasonality(inferred_freq) fcst_df = run_statsforecast_model( @@ -564,6 +568,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) season_length = self._maybe_get_seasonality(inferred_freq) fcst_df = run_statsforecast_model( @@ -656,6 +661,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) fcst_df = run_statsforecast_model( model=_CrostonClassic( @@ -744,6 +750,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) season_length = self._maybe_get_seasonality(inferred_freq) fcst_df = run_statsforecast_model( @@ -833,6 +840,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) fcst_df = run_statsforecast_model( model=_HistoricAverage( @@ -921,6 +929,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) fcst_df = run_statsforecast_model( model=_IMAPA( @@ -1009,6 +1018,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) season_length = self._maybe_get_seasonality(inferred_freq) fcst_df = run_statsforecast_model( @@ -1099,6 +1109,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) season_length = self._maybe_get_seasonality(inferred_freq) fcst_df = run_statsforecast_model( @@ -1188,6 +1199,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) inferred_freq = self._maybe_infer_freq(df, freq) fcst_df = run_statsforecast_model( model=_ZeroModel( diff --git a/timecopilot/models/utils/forecaster.py b/timecopilot/models/utils/forecaster.py index 42322bb..a666d11 100644 --- a/timecopilot/models/utils/forecaster.py +++ b/timecopilot/models/utils/forecaster.py @@ -23,6 +23,8 @@ ) from utilsforecast.validation import ensure_time_dtype +from ...utils.df_utils import to_pandas + def get_seasonality( freq: str, @@ -74,6 +76,7 @@ def maybe_infer_freq(df: pd.DataFrame, freq: str | None) -> str: # based on https://github.com/Nixtla/nixtla/blob/bf67c76fd473a61c72b1f54725ffbcb51a3048c5/nixtla/nixtla_client.py#L208C1-L235C25 if freq is not None: return freq + df = to_pandas(df) sizes = df["unique_id"].value_counts(sort=True) times = df.loc[df["unique_id"] == sizes.index[0], "ds"].sort_values() if times.dt.tz is not None: @@ -89,6 +92,7 @@ def maybe_infer_freq(df: pd.DataFrame, freq: str | None) -> str: def maybe_convert_col_to_datetime(df: pd.DataFrame, col_name: str) -> pd.DataFrame: + df = to_pandas(df) if not pd.api.types.is_datetime64_any_dtype(df[col_name]): df = df.copy() df[col_name] = pd.to_datetime(df[col_name]) @@ -236,6 +240,7 @@ def cross_validation( - prediction intervals if `level` is specified. - quantile forecasts if `quantiles` is specified. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) df = maybe_convert_col_to_datetime(df, "ds") # mlforecast cv code @@ -344,6 +349,7 @@ def detect_anomalies( an anomaly is defined as a value that is outside of the prediction interval (True or False). """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) df = maybe_convert_col_to_datetime(df, "ds") if h is None: diff --git a/timecopilot/models/utils/gluonts_forecaster.py b/timecopilot/models/utils/gluonts_forecaster.py index 25bc2d5..583e03a 100644 --- a/timecopilot/models/utils/gluonts_forecaster.py +++ b/timecopilot/models/utils/gluonts_forecaster.py @@ -12,6 +12,7 @@ from tqdm import tqdm from .forecaster import Forecaster, QuantileConverter +from ..utils.df_utils import to_pandas def fix_freq(freq: str) -> str: @@ -163,6 +164,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) df = maybe_convert_col_to_float32(df, "y") freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) diff --git a/timecopilot/models/utils/parallel_forecaster.py b/timecopilot/models/utils/parallel_forecaster.py index 4873f3d..6730ddb 100644 --- a/timecopilot/models/utils/parallel_forecaster.py +++ b/timecopilot/models/utils/parallel_forecaster.py @@ -5,6 +5,7 @@ import pandas as pd from .forecaster import Forecaster +from ..utils.df_utils import to_pandas class ParallelForecaster(Forecaster): @@ -102,6 +103,7 @@ def forecast( For multi-series data, the output retains the same unique identifiers as the input DataFrame. """ + df = to_pandas(df) freq = self._maybe_infer_freq(df, freq) fcst_df = self._apply_parallel( df.groupby("unique_id"),