diff --git a/ci/policy.yaml b/ci/policy.yaml index 0e0f81d8a1f..27e81d58cef 100644 --- a/ci/policy.yaml +++ b/ci/policy.yaml @@ -26,6 +26,7 @@ policy: - hypothesis - pytz - pytest-reportlog + - pyarrow # transitive dependency of dask.dataframe, not an xarray dependency # these packages don't fail the CI, but will be printed in the report ignored_violations: - array-api-strict diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 29b4e76461c..35db6ba93e6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -17,6 +17,71 @@ New Features Breaking Changes ~~~~~~~~~~~~~~~~ +- The minimum versions of some dependencies were changed (see table below). + Notably, the minimum ``zarr`` version is now 3.0. Zarr v2 format data is + still readable via ``zarr-python`` 3's built-in compatibility layer; however, + ``zarr-python`` 2 is no longer a supported dependency. + By `Joe Hamman `_. + +.. list-table:: + :header-rows: 1 + :widths: 30 20 20 + + * - Dependency + - Old Version + - New Version + * - boto3 + - 1.34 + - 1.37 + * - cartopy + - 0.23 + - 0.24 + * - dask-core + - 2024.6 + - 2025.2 + * - distributed + - 2024.6 + - 2025.2 + * - flox + - 0.9 + - 0.10 + * - h5netcdf + - 1.4 + - 1.5 + * - h5py + - 3.11 + - 3.13 + * - iris + - 3.9 + - 3.11 + * - lxml + - 5.1 + - 5.3 + * - matplotlib-base + - 3.8 + - 3.10 + * - numba + - 0.60 + - 0.61 + * - numbagg + - 0.8 + - 0.9 + * - packaging + - 24.1 + - 24.2 + * - rasterio + - 1.3 + - 1.4 + * - scipy + - 1.13 + - 1.15 + * - toolz + - 0.12 + - 1.0 + * - zarr + - 2.18 + - 3.0 + - Xarray will no longer by default decode a variable into a :py:class:`np.timedelta64` dtype based on the presence of a timedelta-like ``"units"`` attribute alone. Instead it will rely on the presence of a diff --git a/pixi.toml b/pixi.toml index 46b76c0a0c3..a55cc033335 100644 --- a/pixi.toml +++ b/pixi.toml @@ -20,7 +20,7 @@ python = "*" numpy = "*" pandas = "*" -packaging = "24.1.*" +packaging = "24.2.*" git = "*" # needed for dynamic versioning [dependencies] @@ -107,40 +107,41 @@ numpy = "1.26.*" pandas = "2.2.*" [feature.minimum-scipy.dependencies] -scipy = "1.13.*" +scipy = "1.15.*" [feature.min-versions.dependencies] array-api-strict = "2.4.*" # dependency for testing the array api compat -boto3 = "1.34.*" +boto3 = "1.37.*" bottleneck = "1.4.*" -cartopy = "0.23.*" +cartopy = "0.24.*" cftime = "1.6.*" -dask-core = "2024.6.*" -distributed = "2024.6.*" -flox = "0.9.*" -h5netcdf = "1.4.*" +dask-core = "2025.2.*" +distributed = "2025.2.*" +flox = "0.10.*" +h5netcdf = "1.5.*" # h5py and hdf5 tend to cause conflicts # for e.g. hdf5 1.12 conflicts with h5py=3.1 # prioritize bumping other packages instead -h5py = "3.11.*" +h5py = "3.13.*" hdf5 = "1.14.*" -iris = "3.9.*" -lxml = "5.1.*" # Optional dep of pydap -matplotlib-base = "3.8.*" +iris = "3.11.*" +lxml = "5.3.*" # Optional dep of pydap +matplotlib-base = "3.10.*" nc-time-axis = "1.4.*" # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) netcdf4 = "1.6.*" -numba = "0.60.*" -numbagg = "0.8.*" -packaging = "24.1.*" +numba = "0.61.*" +numbagg = "0.9.*" +packaging = "24.2.*" pint = "0.24.*" pydap = "3.5.*" -rasterio = "1.3.*" +rasterio = "1.4.*" seaborn = "0.13.*" sparse = "0.15.*" -toolz = "0.12.*" -zarr = "2.18.*" +toolz = "1.0.*" +zarr = "3.0.*" +pyarrow = "*" # required by dask.dataframe # TODO: Remove `target.unix` restriction once pandas nightly has win-64 wheels again. # Without this, `pixi lock` fails because it can't solve the nightly feature for win-64, diff --git a/pyproject.toml b/pyproject.toml index f8c6577b5f8..0a3fb8996c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ name = "xarray" readme = "README.md" requires-python = ">=3.11" -dependencies = ["numpy>=1.26", "packaging>=24.1", "pandas>=2.2"] +dependencies = ["numpy>=1.26", "packaging>=24.2", "pandas>=2.2"] # We don't encode minimum requirements here (though if we can write a script to # generate the text from `min_deps_check.py`, that's welcome...). We do add @@ -27,27 +27,27 @@ dependencies = ["numpy>=1.26", "packaging>=24.1", "pandas>=2.2"] [project.optional-dependencies] accel = [ - "scipy>=1.13", + "scipy>=1.15", "bottleneck", - "numbagg>=0.8", + "numbagg>=0.9", "numba>=0.62", # numba 0.62 added support for numpy 2.3 - "flox>=0.9", + "flox>=0.10", "opt_einsum", ] complete = ["xarray[accel,etc,io,parallel,viz]"] io = [ "netCDF4>=1.6.0", - "h5netcdf[h5py]>=1.4.0", + "h5netcdf[h5py]>=1.5.0", "pydap", - "scipy>=1.13", - "zarr>=2.18", + "scipy>=1.15", + "zarr>=3.0", "fsspec", "cftime", "pooch", ] etc = ["sparse>=0.15"] parallel = ["dask[complete]"] -viz = ["cartopy>=0.23", "matplotlib>=3.8", "nc-time-axis", "seaborn"] +viz = ["cartopy>=0.24", "matplotlib>=3.10", "nc-time-axis", "seaborn"] types = [ "pandas-stubs", "scipy-stubs", diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 7a5c6523bdb..4954382c3c8 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -45,7 +45,7 @@ def _data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08, decode_bytes=Tru if any(arr.dtype.kind == "S" for arr in [arr1, arr2]) and decode_bytes: arr1 = _decode_string_data(arr1) arr2 = _decode_string_data(arr2) - exact_dtypes = ["M", "m", "O", "S", "U"] + exact_dtypes = ["M", "m", "O", "S", "U", "T"] if any(arr.dtype.kind in exact_dtypes for arr in [arr1, arr2]): return duck_array_ops.array_equiv(arr1, arr2) else: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f674b580173..f40a39ba51a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -524,7 +524,7 @@ def check_dtypes_roundtripped(self, expected, actual): actual_dtype = actual.variables[k].dtype # TODO: check expected behavior for string dtypes more carefully - string_kinds = {"O", "S", "U"} + string_kinds = {"O", "S", "U", "T"} assert expected_dtype == actual_dtype or ( expected_dtype.kind in string_kinds and actual_dtype.kind in string_kinds diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 38ec3ef83fb..32f224e89a6 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -704,20 +704,28 @@ def test_zarr_encoding(self, tmpdir, simple_datatree, zarr_format) -> None: codec = Blosc(cname="zstd", clevel=3, shuffle=2) comp = {"compressors": (codec,)} if has_zarr_v3 else {"compressor": codec} elif zarr_format == 3: - # specifying codecs in zarr_format=3 requires importing from zarr 3 namespace - from zarr.registry import get_codec_class + import zarr - Blosc = get_codec_class("numcodecs.blosc") - comp = {"compressors": (Blosc(cname="zstd", clevel=3),)} # type: ignore[call-arg] + comp = { + "compressors": (zarr.codecs.BloscCodec(cname="zstd", clevel=3),), + } enc = {"/set2": dict.fromkeys(original_dt["/set2"].dataset.data_vars, comp)} original_dt.to_zarr(filepath, encoding=enc, zarr_format=zarr_format) with open_datatree(filepath, engine="zarr") as roundtrip_dt: compressor_key = "compressors" if has_zarr_v3 else "compressor" - assert ( - roundtrip_dt["/set2/a"].encoding[compressor_key] == comp[compressor_key] - ) + if zarr_format == 3: + # zarr v3 BloscCodec auto-tunes typesize and shuffle on write, + # so we only check the attributes we explicitly set + rt_codec = roundtrip_dt["/set2/a"].encoding[compressor_key][0] + assert rt_codec.cname.value == "zstd" + assert rt_codec.clevel == 3 + else: + assert ( + roundtrip_dt["/set2/a"].encoding[compressor_key] + == comp[compressor_key] + ) enc["/not/a/group"] = {"foo": "bar"} # type: ignore[dict-item] with pytest.raises(ValueError, match=r"unexpected encoding group.*"):