tilebox · lukasbindreiter · Jan 28, 2026 · Jan 24, 2026 · Jan 26, 2026 · Jan 26, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
     hooks:
       - id: sync-with-uv
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.14.11
+    rev: v0.14.14
     hooks:
       - id: ruff-check
         args: [--fix, --exit-non-zero-on-fix]

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.47.0] - 2026-01-28
+
+### Added
+
+`tilebox-datasets` and `tilebox-workflows`: Added support for pandas v3.
+
 ### Changed
 
 - `tilebox-datasets`: The `create_dataset` method of the `Client` has been removed. Use `create_or_update_dataset` instead.
@@ -304,7 +310,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Released under the [MIT](https://opensource.org/license/mit) license.
 - Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc`
 
-[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.46.0...HEAD
+[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.47.0...HEAD
+[0.47.0]: https://github.com/tilebox/tilebox-python/compare/v0.46.0...v0.47.0
 [0.46.0]: https://github.com/tilebox/tilebox-python/compare/v0.45.0...v0.46.0
 [0.45.0]: https://github.com/tilebox/tilebox-python/compare/v0.44.0...v0.45.0
 [0.44.0]: https://github.com/tilebox/tilebox-python/compare/v0.43.0...v0.44.0

diff --git a/matrix.toml b/matrix.toml
@@ -0,0 +1,86 @@
+# Matrix test configuration for testing pandas compatibility across Python versions
+# Run with: pymatrix --config matrix.toml
+#
+# Split into scenarios per package due to pytest conftest collision when running
+# multiple packages together (each has tests/conftest.py).
+
+[[scenarios]]
+name = "datasets-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+working-dir = "tilebox-datasets"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "datasets-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+working-dir = "tilebox-datasets"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
+
+[[scenarios]]
+name = "storage-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+working-dir = "tilebox-storage"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "storage-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+working-dir = "tilebox-storage"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
+
+[[scenarios]]
+name = "grpc-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+working-dir = "tilebox-grpc"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "grpc-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+working-dir = "tilebox-grpc"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
+
+[[scenarios]]
+name = "workflows-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+working-dir = "tilebox-workflows"
+test-command = "pytest"
+# Ignore FutureWarning: google-cloud-storage raises deprecation warning on Python 3.10
+test-args = ["-v", "-W", "ignore::FutureWarning"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "workflows-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+working-dir = "tilebox-workflows"
+test-command = "pytest"
+test-args = ["-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,10 +23,10 @@ dev = [
     # DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0)
     "pyarrow>=17.0.0",
     # some dev tooling
-    "ruff>=0.11.10",
+    "ruff>=0.14.10",
     "types-protobuf>=6.30",
     "junitparser>=3.2.0",
-    "ty>=0.0.11",
+    "ty>=0.0.14",
     "prek>=0.2.27",
 ]
 

diff --git a/tilebox-datasets/pyproject.toml b/tilebox-datasets/pyproject.toml
@@ -42,7 +42,6 @@ dev = [
     "pytest>=8.3.2",
 ]
 
-
 [project.urls]
 Homepage = "https://tilebox.com"
 Documentation = "https://docs.tilebox.com/datasets/introduction"

diff --git a/tilebox-datasets/tests/protobuf_conversion/test_protobuf_xarray.py b/tilebox-datasets/tests/protobuf_conversion/test_protobuf_xarray.py
@@ -1,5 +1,6 @@
 from uuid import UUID
 
+import pandas as pd
 import pytest
 from hypothesis import given, settings
 from hypothesis.strategies import lists
@@ -152,21 +153,21 @@ def test_convert_datapoints(datapoints: list[ExampleDatapoint]) -> None:  # noqa
         for uuid in dataset.some_id.to_numpy():
             assert isinstance(uuid, str)
 
-    # strings should be stored as object arrays, with None as the fill value if missing
+    # strings should be stored as object arrays, with missing values (None or NaN) as fill
     if "some_string" in dataset:
         for string in dataset.some_string.to_numpy():
-            assert string is None or isinstance(string, str)
+            assert pd.isna(string) or isinstance(string, str)
     if "some_repeated_string" in dataset:
         for string in dataset.some_repeated_string.to_numpy().ravel():
-            assert string is None or isinstance(string, str)
+            assert pd.isna(string) or isinstance(string, str)
 
-    # bytes should be stored as object arrays, with None as the fill value if missing
+    # bytes should be stored as object arrays, with missing values (None or NaN) as fill
     if "some_bytes" in dataset:
         for bytes_ in dataset.some_bytes.to_numpy():
-            assert bytes_ is None or isinstance(bytes_, bytes)
+            assert pd.isna(bytes_) or isinstance(bytes_, bytes)
     if "some_repeated_bytes" in dataset:
         for bytes_ in dataset.some_repeated_bytes.to_numpy().ravel():
-            assert bytes_ is None or isinstance(bytes_, bytes)
+            assert pd.isna(bytes_) or isinstance(bytes_, bytes)
 
 
 @given(lists(example_datapoints(missing_fields=True), min_size=1, max_size=10))

diff --git a/tilebox-datasets/tilebox/datasets/progress.py b/tilebox-datasets/tilebox/datasets/progress.py
@@ -3,6 +3,11 @@
 from types import TracebackType
 from typing import Any
 
+try:
+    from typing import Self  # ty: ignore[unresolved-import]
+except ImportError:  # Self is only available in Python 3.11+
+    from typing_extensions import Self
+
 from tqdm.auto import tqdm
 
 from tilebox.datasets.query.time_interval import TimeInterval
@@ -42,7 +47,7 @@ def __init__(
         self._actual_start_time = actual_start_time
         self._total_data_points = 0
 
-    def __enter__(self) -> "TimeIntervalProgressBar":
+    def __enter__(self) -> Self:
         self._progress_bar = tqdm(
             bar_format="{l_bar}{bar}[{elapsed}<{remaining}{postfix}]",
             total=self._calc_progress_seconds(self._interval.end),

diff --git a/tilebox-datasets/tilebox/datasets/protobuf_conversion/field_types.py b/tilebox-datasets/tilebox/datasets/protobuf_conversion/field_types.py
@@ -4,6 +4,7 @@
 from uuid import UUID
 
 import numpy as np
+import pandas as pd
 from google.protobuf.descriptor import FieldDescriptor
 from google.protobuf.duration_pb2 import Duration
 from google.protobuf.message import Message
@@ -17,6 +18,8 @@
 from tilebox.datasets.datasets.v1.well_known_types_pb2 import Geometry, LatLon, LatLonAlt, Quaternion, Vec3
 
 ScalarProtoFieldValue = Message | float | str | bool | bytes
+
+
 ProtoFieldValue = ScalarProtoFieldValue | Sequence[ScalarProtoFieldValue] | None
 
 _FILL_VALUES_BY_DTYPE: dict[type[np.dtype[Any]], Any] = {
@@ -107,7 +110,7 @@ def from_proto(self, value: ProtoFieldValue) -> int:
         return value.seconds * 10**9 + value.nanos
 
     def to_proto(self, value: DatetimeScalar) -> Timestamp | None:
-        if value is None or (isinstance(value, np.datetime64) and np.isnat(value)):
+        if is_missing(value) or (isinstance(value, np.datetime64) and np.isnat(value)):
             return None
         # we use pandas to_datetime function to handle a variety of input types that can be coerced to datetimes
         seconds, nanos = divmod(to_datetime(value, utc=True).value, 10**9)
@@ -124,10 +127,10 @@ def from_proto(self, value: ProtoFieldValue) -> int:
         return value.seconds * 10**9 + value.nanos
 
     def to_proto(self, value: str | float | timedelta | np.timedelta64) -> Duration | None:
-        if value is None or (isinstance(value, np.timedelta64) and np.isnat(value)):
+        if is_missing(value) or (isinstance(value, np.timedelta64) and np.isnat(value)):
             return None
         # we use pandas to_timedelta function to handle a variety of input types that can be coerced to timedeltas
-        seconds, nanos = divmod(to_timedelta(value).value, 10**9)  # type: ignore[arg-type]
+        seconds, nanos = divmod(to_timedelta(value).value, 10**9)
         return Duration(seconds=seconds, nanos=nanos)
 
 
@@ -141,7 +144,7 @@ def from_proto(self, value: ProtoFieldValue) -> str:
         return str(UUID(bytes=value.uuid))
 
     def to_proto(self, value: str | UUID) -> UUIDMessage | None:
-        if not value:  # None or empty string
+        if is_missing(value) or value == "":  # missing or empty string
             return None
 
         if isinstance(value, str):
@@ -160,7 +163,7 @@ def from_proto(self, value: ProtoFieldValue) -> Any:
         return from_wkb(value.wkb)
 
     def to_proto(self, value: Any) -> Geometry | None:
-        if value is None:
+        if is_missing(value):
             return None
         return Geometry(wkb=value.wkb)
 
@@ -175,7 +178,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
         return value.x, value.y, value.z
 
     def to_proto(self, value: tuple[float, float, float]) -> Vec3 | None:
-        if value is None or np.all(np.isnan(value)):
+        if is_missing(value) or np.all(np.isnan(value)):
             return None
         return Vec3(x=value[0], y=value[1], z=value[2])
 
@@ -190,7 +193,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float, float
         return value.q1, value.q2, value.q3, value.q4
 
     def to_proto(self, value: tuple[float, float, float, float]) -> Quaternion | None:
-        if value is None or np.all(np.isnan(value)):
+        if is_missing(value) or np.all(np.isnan(value)):
             return None
         return Quaternion(q1=value[0], q2=value[1], q3=value[2], q4=value[3])
 
@@ -205,7 +208,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float]:
         return value.latitude, value.longitude
 
     def to_proto(self, value: tuple[float, float]) -> LatLon | None:
-        if value is None or np.all(np.isnan(value)):
+        if is_missing(value) or np.all(np.isnan(value)):
             return None
         return LatLon(latitude=value[0], longitude=value[1])
 
@@ -221,7 +224,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
         return value.latitude, value.longitude, value.altitude
 
     def to_proto(self, value: tuple[float, float, float]) -> LatLonAlt | None:
-        if value is None or np.all(np.isnan(value)):
+        if is_missing(value) or np.all(np.isnan(value)):
             return None
         return LatLonAlt(latitude=value[0], longitude=value[1], altitude=value[2])
 
@@ -301,3 +304,19 @@ def _camel_to_uppercase(name: str) -> str:
         'PROCESSING_LEVEL'
     """
     return "".join(["_" + c.lower() if c.isupper() else c for c in name]).lstrip("_").upper()
+
+
+def is_missing(value: Any) -> bool:
+    """Check if a value represents a missing/null value.
+
+    Handles None, np.nan, pd.NA, NaT, and other pandas missing value sentinels.
+    This is needed for pandas 3.0+ compatibility where object-dtype columns use
+    np.nan instead of None for missing values.
+    """
+    try:
+        return bool(pd.isna(value))
+    except ValueError:
+        # pd.isna returns either a bool, or an array of bools. In case of an array, converting the result to bool()
+        # will raise a ValueError. For an array, we know it's not a missing value, even an array of all NaNs is not
+        # a missing value.
+        return False
diff --git a/tilebox-datasets/tilebox/datasets/protobuf_conversion/to_protobuf.py b/tilebox-datasets/tilebox/datasets/protobuf_conversion/to_protobuf.py
@@ -12,6 +12,7 @@
     ProtobufFieldType,
     ProtoFieldValue,
     infer_field_type,
+    is_missing,
 )
 
 IngestionData = Mapping[str, Collection[Any]] | Iterable[tuple[str, Collection[Any]]] | pd.DataFrame | xr.Dataset
@@ -120,7 +121,7 @@ def convert_values_to_proto(
     values: np.ndarray | pd.Series, field_type: ProtobufFieldType, filter_none: bool = False
 ) -> list[ProtoFieldValue]:
     if filter_none:
-        return [field_type.to_proto(value) for value in values if value is not None]
+        return [field_type.to_proto(value) for value in values if not is_missing(value)]
     return [field_type.to_proto(value) for value in values]
 
 

diff --git a/tilebox-datasets/tilebox/datasets/query/id_interval.py b/tilebox-datasets/tilebox/datasets/query/id_interval.py
@@ -50,22 +50,16 @@ def parse(cls, arg: IDIntervalLike, start_exclusive: bool = False, end_inclusive
         Returns:
             IDInterval: The parsed ID interval
         """
+        if isinstance(arg, IDInterval):
+            return arg
 
-        match arg:
-            case IDInterval(_, _, _, _):
-                return arg
-            case (UUID(), UUID()):
-                start: UUID = arg[0]
-                end: UUID = arg[1]
+        if isinstance(arg, tuple) and len(arg) == 2:
+            start, end = arg
+            if isinstance(start, UUID) and isinstance(end, UUID):
                 return IDInterval(
-                    start_id=start,
-                    end_id=end,
-                    start_exclusive=start_exclusive,
-                    end_inclusive=end_inclusive,
+                    start_id=start, end_id=end, start_exclusive=start_exclusive, end_inclusive=end_inclusive
                 )
-            case (str(), str()):
-                start: str = arg[0]
-                end: str = arg[1]
+            if isinstance(start, str) and isinstance(end, str):
                 return IDInterval(
                     start_id=UUID(start),
                     end_id=UUID(end),

diff --git a/tilebox-datasets/tilebox/datasets/query/time_interval.py b/tilebox-datasets/tilebox/datasets/query/time_interval.py
@@ -15,8 +15,10 @@
 
 # A type alias for the different types that can be used to specify a time interval
 TimeIntervalLike: TypeAlias = (
-    DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | "TimeInterval"
+    "DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | TimeInterval"
 )
+# once we require python >= 3.12 we can replace this with a type statement, which doesn't require a string at all
+# type TimeIntervalLike = DatetimeScalar | tuple[DatetimeScalar ...  | TimeInterval
 
 
 @dataclass(frozen=True)

diff --git a/tilebox-grpc/pyproject.toml b/tilebox-grpc/pyproject.toml
@@ -33,7 +33,6 @@ dependencies = [
     "nest-asyncio>=1.5.0",
 ]
 
-
 [dependency-groups]
 dev = ["pytest-asyncio>=0.24.0", "pytest-cov>=5.0.0", "pytest>=8.3.2"]
 

diff --git a/tilebox-storage/pyproject.toml b/tilebox-storage/pyproject.toml
@@ -27,6 +27,7 @@ dependencies = [
     "folium>=0.15",
     "shapely>=2",
     "obstore>=0.8.0",
+    "boto3>=1.37.0",  # required for the obstore Boto3CredentialProvider
 ]
 
 [dependency-groups]

diff --git a/tilebox-workflows/pyproject.toml b/tilebox-workflows/pyproject.toml
@@ -35,7 +35,13 @@ dependencies = [
 ]
 
 [dependency-groups]
-dev = ["hypothesis>=6.112.1", "pytest-cov>=5.0.0", "pytest>=8.3.2", "moto>=5"]
+dev = [
+    "hypothesis>=6.112.1",
+    "pytest-cov>=5.0.0",
+    "pytest>=8.3.2",
+    "moto>=5",
+    "pytest-asyncio>=1.3.0",
+]
 
 [project.urls]
 Homepage = "https://tilebox.com"
-Original file line number
+Diff line change
@@ Expand Up / @@ -33,7 +33,6 @@ dependencies = [ @@
         "nest-asyncio>=1.5.0",
     ]
     [dependency-groups]
     dev = ["pytest-asyncio>=0.24.0", "pytest-cov>=5.0.0", "pytest>=8.3.2"]
@@ Expand Down @@