diff --git a/docs/changelog.md b/docs/changelog.md index 9274cfa..64c58d9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,10 @@ # Changelog +## 0.6.1 (_unreleased_) + +- Do not mutate the input dataset when decoding ({pull}`226`) +- Properly decode ellipsoids for the CF convention ({pull}`228`) + ## 0.6.0 (2026-02-05) - Support variable-sized cells with the `"zuniq"` indexing scheme in `healpix` ({pull}`207`) diff --git a/pixi.toml b/pixi.toml index 932bbf8..82cafd2 100644 --- a/pixi.toml +++ b/pixi.toml @@ -100,13 +100,14 @@ isort = "*" jupyterlab = "*" jupyter-resource-usage = "*" jupyterlab_code_formatter = "*" -python-build = ">=1.3.0,<2" -h3-py = ">=4.3.0,<5" -geopandas = ">=1.1.1,<2" -pyinstrument = ">=5.1.1,<6" -jupyterlab-myst = ">=2.4.2,<3" -h5netcdf = ">=1.7.3,<2" -zarr = ">=3.1.5,<4" +python-build = ">=1.3.0" +h3-py = ">=4.3.0" +geopandas = ">=1.1.1" +pyinstrument = ">=5.1.1" +jupyterlab-myst = ">=2.4.2" +h5netcdf = ">=1.7.3" +zarr = ">=3.1.5" +cf-xarray = ">=0.11.1" [environments] nightly = { features = [ diff --git a/xdggs/conventions/cf.py b/xdggs/conventions/cf.py index dfb74ef..ba2dfc0 100644 --- a/xdggs/conventions/cf.py +++ b/xdggs/conventions/cf.py @@ -1,4 +1,4 @@ -from collections.abc import Hashable +from collections.abc import Hashable, Sequence from typing import Any, Literal import numpy as np @@ -22,6 +22,23 @@ def remove_grid_mapping(ds, name): return new +def remove_keys(mapping: dict[str, Any], exclude: Sequence[str]) -> dict[str, Any]: + return {k: v for k, v in mapping.items() if k not in exclude} + + +ellipsoid_attribute_translations = { + "semi_major_axis": "semimajor_axis", + "semi_minor_axis": "semiminor_axis", + "earth_radius": "radius", + "reference_ellipsoid_name": "name", + "inverse_flattening": "inverse_flattening", +} + + +def extract_ellipsoid_parameters(mapping: dict[str, Any]) -> dict[str, Any]: + return {k: v for k, v in mapping.items() if k in ellipsoid_attribute_translations} + + @register_convention("cf") class Cf(Convention): def translate_keys( @@ -35,6 +52,17 @@ def translate_keys( return {translations.get(key, key): value for key, value in mapping.items()} + def translate_ellipsoid( + self, + mapping: dict[str, Any], + direction: Literal["forward", "inverse"] = "forward", + ) -> dict[str, Any] | None: + translations = ellipsoid_attribute_translations + if direction == "inverse": + translations = {v: k for k, v in translations.items()} + + return {translations.get(key, key): value for key, value in mapping.items()} + def decode( self, ds: xr.Dataset, @@ -70,7 +98,13 @@ def decode( ) name = coord_names[0] - grid_info = self.translate_keys(crs.attrs, direction="forward") + ellipsoid = extract_ellipsoid_parameters(crs.attrs) + grid_info = self.translate_keys( + remove_keys(crs.attrs, ellipsoid), direction="forward" + ) + if parsed_ellipsoid := self.translate_ellipsoid(ellipsoid, direction="forward"): + grid_info["ellipsoid"] = parsed_ellipsoid + grid_name = grid_info["grid_name"] var = ds.variables[name].copy(deep=False) @@ -100,8 +134,11 @@ def encode(self, ds: xr.Dataset, *, encoding: dict[str, Any] | None = None): grid_name = infer_grid_name(ds.dggs.index) grid_info_dict = grid_info.to_dict() - metadata = self.translate_keys(grid_info_dict, direction="inverse") + ellipsoid = grid_info_dict.pop("ellipsoid", {}) + metadata = self.translate_keys( + grid_info_dict, direction="inverse" + ) | self.translate_ellipsoid(ellipsoid, direction="inverse") crs = xr.Variable((), np.int8(0), metadata) additional_var_attrs = {"grid_mapping": grid_name} diff --git a/xdggs/conventions/xdggs.py b/xdggs/conventions/xdggs.py index 8a12d05..2181566 100644 --- a/xdggs/conventions/xdggs.py +++ b/xdggs/conventions/xdggs.py @@ -1,3 +1,4 @@ +import copy from collections.abc import Hashable from typing import Any @@ -40,17 +41,24 @@ def decode( [dim] = var.dims if grid_info is None: - grid_info = var.attrs + grid_info = copy.deepcopy(var.attrs) elif isinstance(grid_info, DGGSInfo): # TODO: avoid serializing / deserializing cycle grid_info = grid_info.to_dict() - grid_name = grid_info["grid_name"] + try: + grid_name = grid_info["grid_name"] + except KeyError: + raise DecoderError( + "xdggs convention: no grid name found (`grid_name`)." + " Try choosing a different convention or verify the dataset metadata." + ) from None + if grid_name not in GRID_REGISTRY: raise DecoderError(f"xdggs convention: unknown grid name: {grid_name}") index_cls = GRID_REGISTRY[grid_name] - var_ = var.copy(deep=True) + var_ = var.copy(deep=False) var_.attrs = grid_info index = index_cls.from_variables({name: var_}, options=index_options) @@ -65,4 +73,4 @@ def encode(self, ds: xr.Dataset, *, encoding: None = None) -> xr.Dataset: # TODO: `assign_coords` + `assign_attrs` drops the index ds_ = ds.copy(deep=False) ds_[coord].attrs.update(metadata) - return ds_ + return ds_.drop_indexes(coord) diff --git a/xdggs/ellipsoid.py b/xdggs/ellipsoid.py index 6ed702f..c58295f 100644 --- a/xdggs/ellipsoid.py +++ b/xdggs/ellipsoid.py @@ -29,7 +29,15 @@ class Ellipsoid: @classmethod def from_dict(cls, mapping): - return cls(**mapping) + semimajor = mapping["semimajor_axis"] + if (semiminor := mapping.get("semiminor_axis")) is not None: + inverse_flattening = semimajor / (semimajor - semiminor) + else: + inverse_flattening = mapping["inverse_flattening"] + name = mapping.get("name") + return cls( + semimajor_axis=semimajor, inverse_flattening=inverse_flattening, name=name + ) def to_dict(self): mapping = asdict(self) diff --git a/xdggs/h3.py b/xdggs/h3.py index 3a78065..f8175e4 100644 --- a/xdggs/h3.py +++ b/xdggs/h3.py @@ -242,5 +242,8 @@ def grid_info(self) -> H3Info: def _replace(self, new_index: xr.Index): return type(self)(new_index, self._dim, self._name, self._grid) + def __repr__(self): + return f"" + def _repr_inline_(self, max_width: int): return f"H3Index(level={self._grid.level})" diff --git a/xdggs/healpix.py b/xdggs/healpix.py index f897335..5f1f37e 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -190,6 +190,8 @@ def translate_nside(nside): def translate_ellipsoid(value): if isinstance(value, (str, Sphere, Ellipsoid)): return value + elif value is None or not value: + return value return parse_ellipsoid(value) @@ -735,5 +737,13 @@ def _replace(self, new_index: xr.Index): def grid_info(self) -> HealpixInfo: return self._grid + def __repr__(self): + return "\n".join( + [ + f"", + repr(self._grid), + ] + ) + def _repr_inline_(self, max_width: int): return f"HealpixIndex(level={self._grid.level}, indexing_scheme={self._grid.indexing_scheme}, kind={self._kind})" diff --git a/xdggs/tests/test_conventions.py b/xdggs/tests/test_conventions.py index aff20b6..c7b3eda 100644 --- a/xdggs/tests/test_conventions.py +++ b/xdggs/tests/test_conventions.py @@ -33,23 +33,34 @@ def test_decode(self, grid_info, cell_ids, name, dim): var = xr.Variable(dim, cell_ids, grid_info) index = xdggs.index.DGGSIndex.from_variables({name: var}, options={}) - expected = xr.Coordinates.from_xindex(index).to_dataset() + expected = xr.Coordinates.from_xindex(index).to_dataset().copy(deep=True) + expected[name].attrs = {} obj = xr.Dataset(coords={name: var}) + orig = obj.copy(deep=True) + actual = convention.decode( obj, grid_info=None, name=name, index_options={}, ) + # should not modify the original dataset + xr.testing.assert_identical(obj, orig) + print(obj, actual, expected) xr.testing.assert_identical(actual, expected) assert_indexes_equal(actual[name].xindexes, expected[name].xindexes) obj = xr.Dataset(coords={name: (dim, cell_ids)}) + orig = obj.copy(deep=True) actual = convention.decode( obj, grid_info=grid_info, name=name, index_options={} ) + + # should not modify the original dataset + xr.testing.assert_identical(obj, orig) + xr.testing.assert_identical(actual, expected) assert_indexes_equal(actual[name].xindexes, expected[name].xindexes) @@ -77,19 +88,22 @@ def test_encode(self, grid_info, cell_ids, name, dim): index = index_cls.from_variables({name: var}, options={}) obj = xr.Dataset(coords=xr.Coordinates({name: var}, indexes={name: index})) + orig = obj.copy(deep=True) + + expected = obj.drop_indexes(name).assign_coords( + {name: lambda ds: ds[name].assign_attrs(grid_info)} + ) - # no-op encoded = convention.encode(obj) - xr.testing.assert_identical(encoded, obj) - assert_indexes_equal(encoded.xindexes, obj.xindexes) + # should not modify the original dataset + xr.testing.assert_identical(obj, orig) + xr.testing.assert_identical(encoded, expected) + assert_indexes_equal(encoded.xindexes, expected.xindexes) -class TestCfConvention: - def translate(self, mapping): - translations = {"grid_name": "grid_mapping_name", "level": "refinement_level"} - return {translations.get(name, name): value for name, value in mapping.items()} +class TestCfConvention: def index_metadata(self, grid_info): grid_name = grid_info["grid_name"] @@ -99,19 +113,43 @@ def index_metadata(self, grid_info): ["name", "dim"], [("cell_ids", "cells"), ("zone_ids", "zones")] ) @pytest.mark.parametrize( - ["grid_info", "cell_ids"], + ["crs_attrs", "grid_info", "cell_ids"], ( - ( + pytest.param( + { + "grid_mapping_name": "healpix", + "refinement_level": 1, + "indexing_scheme": "nested", + }, {"grid_name": "healpix", "level": 1, "indexing_scheme": "nested"}, np.array([3, 6, 9], dtype="uint64"), + id="healpix", ), - ( + pytest.param( + {"grid_mapping_name": "h3", "refinement_level": 4}, {"grid_name": "h3", "level": 4}, np.array([0x832830FFFFFFFFF], dtype="uint64"), + id="h3", + ), + pytest.param( + { + "grid_mapping_name": "healpix", + "refinement_level": 1, + "indexing_scheme": "nested", + "earth_radius": 6371000.0, + }, + { + "grid_name": "healpix", + "level": 1, + "indexing_scheme": "nested", + "ellipsoid": {"radius": 6371000.0}, + }, + np.array([3, 6, 9], dtype="uint64"), + id="healpix-ellipsoid_params", ), ), ) - def test_decode(self, grid_info, cell_ids, name, dim): + def test_decode(self, crs_attrs, grid_info, cell_ids, name, dim): convention = Cf() var = xr.Variable(dim, cell_ids, grid_info) @@ -119,18 +157,23 @@ def test_decode(self, grid_info, cell_ids, name, dim): expected = xr.Coordinates.from_xindex(index).to_dataset() metadata = self.index_metadata(grid_info) - translated_grid_info = self.translate(grid_info) cell_id_var = xr.Variable(dim, cell_ids, metadata) - crs_var = xr.Variable((), np.array(0, dtype="int8"), translated_grid_info) + crs_var = xr.Variable((), np.array(0, dtype="int8"), crs_attrs) obj = xr.Dataset(coords={name: cell_id_var, "crs": crs_var}) + orig = obj.copy(deep=True) + actual = convention.decode( obj, grid_info=None, name=name, index_options={}, ) + + # should not modify the original dataset + xr.testing.assert_identical(obj, orig) + xr.testing.assert_identical(actual, expected) assert_indexes_equal(actual.xindexes, expected.xindexes) @@ -138,29 +181,54 @@ def test_decode(self, grid_info, cell_ids, name, dim): ["name", "dim"], [("cell_ids", "cells"), ("zone_ids", "zones")] ) @pytest.mark.parametrize( - ["grid_info", "cell_ids"], + ["crs_attrs", "grid_info", "cell_ids"], ( - ( + pytest.param( + { + "grid_mapping_name": "healpix", + "refinement_level": 1, + "indexing_scheme": "nested", + }, {"grid_name": "healpix", "level": 1, "indexing_scheme": "nested"}, np.array([3, 6, 9], dtype="uint64"), + id="healpix", ), - ( + pytest.param( + {"grid_mapping_name": "h3", "refinement_level": 4}, {"grid_name": "h3", "level": 4}, np.array([0x832830FFFFFFFFF], dtype="uint64"), + id="h3", + ), + pytest.param( + { + "grid_mapping_name": "healpix", + "refinement_level": 1, + "indexing_scheme": "nested", + "earth_radius": 6371000.0, + }, + { + "grid_name": "healpix", + "level": 1, + "indexing_scheme": "nested", + "ellipsoid": {"radius": 6371000.0}, + }, + np.array([3, 6, 9], dtype="uint64"), + id="healpix-ellipsoid_params", ), ), ) - def test_encode(self, grid_info, cell_ids, name, dim): + def test_encode(self, crs_attrs, grid_info, cell_ids, name, dim): convention = Cf() index_cls = xdggs.index.GRID_REGISTRY[grid_info["grid_name"]] - var = xr.Variable(dim, cell_ids, grid_info) - index = index_cls.from_variables({name: var}, options={}) + index = index_cls.from_variables( + {name: xr.Variable(dim, cell_ids, grid_info)}, options={} + ) + obj = xr.Coordinates.from_xindex(index).to_dataset() - obj = xr.Dataset(coords=xr.Coordinates({name: var}, indexes={name: index})) + orig = obj.copy(deep=True) - translated_grid_info = self.translate(grid_info) - crs = xr.Variable((), np.int8(0), translated_grid_info) + crs = xr.Variable((), np.int8(0), crs_attrs) index_var = xr.Variable(dim, cell_ids, self.index_metadata(grid_info)) expected = xr.Coordinates( {"crs": crs, name: index_var}, indexes={} @@ -168,6 +236,9 @@ def test_encode(self, grid_info, cell_ids, name, dim): encoded = convention.encode(obj) + # should not modify the original dataset + xr.testing.assert_identical(obj, orig) + xr.testing.assert_identical(encoded, expected) assert_indexes_equal(encoded.xindexes, expected.xindexes) @@ -258,6 +329,7 @@ def test_encode(self, grid_info, cell_ids, name, dim): index = index_cls.from_variables({name: var}, options={}) obj = xr.Dataset(coords=xr.Coordinates({name: var}, indexes={name: index})) + orig = obj.copy(deep=True) coord = obj.dggs.coord dggs_metadata_object = self.translate(grid_info) | { @@ -273,6 +345,9 @@ def test_encode(self, grid_info, cell_ids, name, dim): encoded = convention.encode(obj) + # should not modify the original dataset + xr.testing.assert_identical(obj, orig) + xr.testing.assert_identical(encoded, expected) assert_indexes_equal(encoded.xindexes, expected.xindexes)