Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ import pandas as pd
from timecopilot import TimeCopilot

# Load the dataset
# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals); inputs
# are normalized to pandas before modeling.
# The DataFrame must include at least the following columns:
# - unique_id: Unique identifier for each time series (string)
# - ds: Date column (datetime format)
Expand Down
2 changes: 2 additions & 0 deletions docs/getting-started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ import pandas as pd
from timecopilot import TimeCopilot

# Load the dataset
# TimeCopilot accepts pandas DataFrames or Polars DataFrames (via narwhals); inputs
# are normalized to pandas before modeling.
# The DataFrame must include at least the following columns:
# - unique_id: Unique identifier for each time series (string)
# - ds: Date column (datetime format)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ dependencies = [
"lightgbm>=4.6.0",
"logfire>=4.7.0",
"mlforecast>=1.0.2",
"narwhals>=2.7.0",
"neuralforecast>=3.0.2",
"nixtla>=0.6.6",
"openai>=1.99.7",
Expand Down
87 changes: 87 additions & 0 deletions tests/utils/test_df_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from importlib import util
from pathlib import Path

import narwhals as nw
import pandas as pd
import pytest


def load_to_pandas():
module_path = (
Path(__file__).resolve().parents[2] / "timecopilot" / "utils" / "df_utils.py"
)
spec = util.spec_from_file_location("df_utils", module_path)
if spec is None or spec.loader is None:
raise RuntimeError("Failed to load df_utils module.")
module = util.module_from_spec(spec)
spec.loader.exec_module(module)
return module.to_pandas


def load_experiment_handler():
module_path = (
Path(__file__).resolve().parents[2]
/ "timecopilot"
/ "utils"
/ "experiment_handler.py"
)
spec = util.spec_from_file_location("experiment_handler", module_path)
if spec is None or spec.loader is None:
raise RuntimeError("Failed to load experiment_handler module.")
module = util.module_from_spec(spec)
spec.loader.exec_module(module)
return module


def test_to_pandas_accepts_pandas_dataframe():
df = pd.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]})
to_pandas = load_to_pandas()
result = to_pandas(df)
assert isinstance(result, pd.DataFrame)
pd.testing.assert_frame_equal(result, df)


def test_to_pandas_accepts_narwhals_dataframe():
nw_df = nw.from_dict(
{"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]},
backend="pandas",
)
to_pandas = load_to_pandas()
result = to_pandas(nw_df)
assert isinstance(result, pd.DataFrame)
assert list(result.columns) == ["unique_id", "ds", "y"]


def test_to_pandas_accepts_polars_dataframe():
pl = pytest.importorskip("polars")
pytest.importorskip("pyarrow")

pl_df = pl.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]})
to_pandas = load_to_pandas()
result = to_pandas(pl_df)
assert isinstance(result, pd.DataFrame)
assert list(result.columns) == ["unique_id", "ds", "y"]


def test_to_pandas_fallback_for_unknown_dataframe():
fallback = pd.DataFrame({"unique_id": ["fallback"], "ds": ["2024-01-01"], "y": [0.0]})
to_pandas = load_to_pandas()
result = to_pandas(object(), fallback=fallback)
pd.testing.assert_frame_equal(result, fallback)


def test_to_pandas_raises_for_unknown_dataframe():
to_pandas = load_to_pandas()
with pytest.raises(TypeError, match="Unsupported dataframe type"):
to_pandas(object())


def test_validate_df_accepts_polars_dataframe():
pl = pytest.importorskip("polars")
pytest.importorskip("pyarrow")

df = pl.DataFrame({"unique_id": ["a"], "ds": ["2024-01-01"], "y": [1.0]})

module = load_experiment_handler()
parsed = module.ExperimentDatasetParser._validate_df(df)
assert isinstance(parsed, pd.DataFrame)
5 changes: 5 additions & 0 deletions timecopilot/forecaster.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd

from .models.utils.forecaster import Forecaster
from .utils.df_utils import to_pandas


class TimeCopilotForecaster(Forecaster):
Expand Down Expand Up @@ -73,6 +74,7 @@ def _call_models(
quantiles: list[float] | None,
**kwargs,
) -> pd.DataFrame:
df = to_pandas(df)
# infer just once to avoid multiple calls to _maybe_infer_freq
freq = self._maybe_infer_freq(df, freq)
res_df: pd.DataFrame | None = None
Expand Down Expand Up @@ -173,6 +175,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
return self._call_models(
"forecast",
merge_on=["unique_id", "ds"],
Expand Down Expand Up @@ -247,6 +250,7 @@ def cross_validation(
- prediction intervals if `level` is specified.
- quantile forecasts if `quantiles` is specified.
"""
df = to_pandas(df)
return self._call_models(
"cross_validation",
merge_on=["unique_id", "ds", "cutoff"],
Expand Down Expand Up @@ -316,6 +320,7 @@ def detect_anomalies(
an anomaly is defined as a value that is outside of the
prediction interval (True or False).
"""
df = to_pandas(df)
return self._call_models(
"detect_anomalies",
merge_on=["unique_id", "ds", "cutoff"],
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/adapters/sktime.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd

from ..utils.forecaster import Forecaster
from ..utils.df_utils import to_pandas

# from sktime.forecasting.base import BaseForecaster, ForecastingHorizon

Expand Down Expand Up @@ -128,6 +129,7 @@ def forecast(
"Level and quantiles are not supported for adapted sktime models yet."
)
# NOTE: may not be needed
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
forecast_horizon = np.arange(1, 1 + h)
id_col = "unique_id"
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/ensembles/median.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from ... import TimeCopilotForecaster
from ..utils.forecaster import Forecaster, QuantileConverter
from ..utils.df_utils import to_pandas


class MedianEnsemble(Forecaster):
Expand Down Expand Up @@ -116,6 +117,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
qc = QuantileConverter(level=level, quantiles=quantiles)
_fcst_df = self.tcf._call_models(
"forecast",
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/chronos.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tqdm import tqdm

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas
from .utils import TimeSeriesDataset


Expand Down Expand Up @@ -216,6 +217,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size)
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/flowstate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tsfm_public.models.flowstate.utils.utils import get_fixed_factor

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas
from .utils import TimeSeriesDataset


Expand Down Expand Up @@ -257,6 +258,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
dataset = TimeSeriesDataset.from_df(
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/sundial.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from transformers import AutoModelForCausalLM

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas
from .utils import TimeSeriesDataset


Expand Down Expand Up @@ -257,6 +258,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size)
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/tabpfn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas


class TabPFN(Forecaster):
Expand Down Expand Up @@ -199,6 +200,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
if qc.quantiles is not None and not np.allclose(
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/timegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from nixtla import NixtlaClient

from ..utils.forecaster import Forecaster
from ...utils.df_utils import to_pandas


class TimeGPT(Forecaster):
Expand Down Expand Up @@ -129,6 +130,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
client = self._get_client()
fcst_df = client.forecast(
Expand Down
3 changes: 3 additions & 0 deletions timecopilot/models/foundation/timesfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tqdm import tqdm

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas
from .utils import TimeSeriesDataset


Expand Down Expand Up @@ -88,6 +89,7 @@ def forecast(
level: list[int | float] | None = None,
quantiles: list[float] | None = None,
) -> pd.DataFrame:
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
if qc.quantiles is not None and len(qc.quantiles) != len(DEFAULT_QUANTILES_TFM):
Expand Down Expand Up @@ -197,6 +199,7 @@ def forecast(
level: list[int | float] | None = None,
quantiles: list[float] | None = None,
) -> pd.DataFrame:
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
if qc.quantiles is not None and len(qc.quantiles) != len(DEFAULT_QUANTILES_TFM):
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/tirex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from tqdm import tqdm

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas
from .utils import TimeSeriesDataset


Expand Down Expand Up @@ -167,6 +168,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size)
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/foundation/toto.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tqdm import tqdm

from ..utils.forecaster import Forecaster, QuantileConverter
from ...utils.df_utils import to_pandas
from .utils import TimeSeriesDataset


Expand Down Expand Up @@ -222,6 +223,7 @@ def forecast(
For multi-series data, the output retains the same unique
identifiers as the input DataFrame.
"""
df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)
dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size)
Expand Down
3 changes: 3 additions & 0 deletions timecopilot/models/foundation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import torch
from utilsforecast.processing import make_future_dataframe

from ...utils.df_utils import to_pandas


class TimeSeriesDataset:
def __init__(
Expand All @@ -29,6 +31,7 @@ def from_df(
batch_size: int,
dtype: torch.dtype = torch.bfloat16,
):
df = to_pandas(df)
tensors = []
df_sorted = df.sort_values(by=["unique_id", "ds"])
for _, group in df_sorted.groupby("unique_id"):
Expand Down
2 changes: 2 additions & 0 deletions timecopilot/models/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from mlforecast.auto import AutoLightGBM, AutoMLForecast

from .utils.forecaster import Forecaster, get_seasonality
from ..utils.df_utils import to_pandas

os.environ["NIXTLA_ID_AS_COL"] = "true"

Expand Down Expand Up @@ -84,6 +85,7 @@ def forecast(
if level is not None or quantiles is not None:
raise ValueError("Level and quantiles are not supported for AutoLGBM yet.")

df = to_pandas(df)
freq = self._maybe_infer_freq(df, freq)
mf = AutoMLForecast(
models=[AutoLightGBM()],
Expand Down
3 changes: 3 additions & 0 deletions timecopilot/models/neural.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ray import tune

from .utils.forecaster import Forecaster
from ..utils.df_utils import to_pandas

os.environ["NIXTLA_ID_AS_COL"] = "true"

Expand Down Expand Up @@ -108,6 +109,7 @@ def forecast(
if level is not None or quantiles is not None:
raise ValueError("Level and quantiles are not supported for AutoNHITS yet.")

df = to_pandas(df)
inferred_freq = self._maybe_infer_freq(df, freq)
if self.config is None:
config = _AutoNHITS.get_default_config(h=h, backend="ray")
Expand Down Expand Up @@ -209,6 +211,7 @@ def forecast(
if level is not None or quantiles is not None:
raise ValueError("Level and quantiles are not supported for AutoTFT yet.")

df = to_pandas(df)
inferred_freq = self._maybe_infer_freq(df, freq)
if self.config is None:
config = _AutoTFT.get_default_config(h=h, backend="ray")
Expand Down
Loading