diff --git a/src/tracksdata/array/_graph_array.py b/src/tracksdata/array/_graph_array.py index 018e7ae6..80418986 100644 --- a/src/tracksdata/array/_graph_array.py +++ b/src/tracksdata/array/_graph_array.py @@ -23,7 +23,7 @@ def _validate_shape( """Helper function to validate the shape argument.""" if shape is None: try: - shape = graph.metadata()["shape"] + shape = graph.metadata["shape"] except KeyError as e: raise KeyError( f"`shape` is required to `{func_name}`. " diff --git a/src/tracksdata/functional/_test/test_napari.py b/src/tracksdata/functional/_test/test_napari.py index 9b4a81dc..712cf53d 100644 --- a/src/tracksdata/functional/_test/test_napari.py +++ b/src/tracksdata/functional/_test/test_napari.py @@ -31,7 +31,7 @@ def test_napari_conversion(metadata_shape: bool) -> None: shape = (2, 10, 22, 32) if metadata_shape: - graph.update_metadata(shape=shape) + graph.metadata.update(shape=shape) arg_shape = None else: arg_shape = shape diff --git a/src/tracksdata/graph/__init__.py b/src/tracksdata/graph/__init__.py index fcf207e2..3906949b 100644 --- a/src/tracksdata/graph/__init__.py +++ b/src/tracksdata/graph/__init__.py @@ -1,10 +1,10 @@ """Graph backends for representing tracking data as directed graphs in memory or on disk.""" -from tracksdata.graph._base_graph import BaseGraph +from tracksdata.graph._base_graph import BaseGraph, MetadataView from tracksdata.graph._graph_view import GraphView from tracksdata.graph._rustworkx_graph import IndexedRXGraph, RustWorkXGraph from tracksdata.graph._sql_graph import SQLGraph InMemoryGraph = RustWorkXGraph -__all__ = ["BaseGraph", "GraphView", "InMemoryGraph", "IndexedRXGraph", "RustWorkXGraph", "SQLGraph"] +__all__ = ["BaseGraph", "GraphView", "InMemoryGraph", "IndexedRXGraph", "MetadataView", "RustWorkXGraph", "SQLGraph"] diff --git a/src/tracksdata/graph/_base_graph.py b/src/tracksdata/graph/_base_graph.py index af32927a..634aacb1 100644 --- a/src/tracksdata/graph/_base_graph.py +++ b/src/tracksdata/graph/_base_graph.py @@ -42,11 +42,75 @@ T = TypeVar("T", bound="BaseGraph") +class MetadataView(dict[str, Any]): + """Dictionary-like metadata view that syncs mutations back to the graph.""" + + _MISSING = object() + + def __init__( + self, + graph: "BaseGraph", + data: dict[str, Any], + *, + is_public: bool = True, + ) -> None: + super().__init__(data) + self._graph = graph + self._is_public = is_public + + def __setitem__(self, key: str, value: Any) -> None: + self._graph._set_metadata_with_validation(is_public=self._is_public, **{key: value}) + super().__setitem__(key, value) + + def __delitem__(self, key: str) -> None: + self._graph._remove_metadata_with_validation(key, is_public=self._is_public) + super().__delitem__(key) + + def pop(self, key: str, default: Any = _MISSING) -> Any: + self._graph._validate_metadata_key(key, is_public=self._is_public) + + if key not in self: + if default is self._MISSING: + raise KeyError(key) + return default + + value = super().__getitem__(key) + self._graph._remove_metadata_with_validation(key, is_public=self._is_public) + super().pop(key, None) + return value + + def popitem(self) -> tuple[str, Any]: + key, value = super().popitem() + self._graph._remove_metadata_with_validation(key, is_public=self._is_public) + return key, value + + def clear(self) -> None: + keys = list(self.keys()) + for key in keys: + self._graph._remove_metadata_with_validation(key, is_public=self._is_public) + super().clear() + + def setdefault(self, key: str, default: Any = None) -> Any: + if key in self: + return super().__getitem__(key) + self._graph._set_metadata_with_validation(is_public=self._is_public, **{key: default}) + super().__setitem__(key, default) + return default + + def update(self, *args, **kwargs) -> None: + updates = dict(*args, **kwargs) + if updates: + self._graph._set_metadata_with_validation(is_public=self._is_public, **updates) + super().update(updates) + + class BaseGraph(abc.ABC): """ Base class for a graph backend. """ + _PRIVATE_METADATA_PREFIX = "__private_" + node_added = Signal(int, object) node_removed = Signal(int, object) node_updated = Signal(int, object, object) @@ -1187,7 +1251,8 @@ def from_other(cls: type[T], other: "BaseGraph", **kwargs) -> T: node_attrs = node_attrs.drop(DEFAULT_ATTR_KEYS.NODE_ID) graph = cls(**kwargs) - graph.update_metadata(**other.metadata()) + graph.metadata.update(other.metadata) + graph._private_metadata.update(other._private_metadata_for_copy()) current_node_attr_schemas = graph._node_attr_schemas() for k, v in other._node_attr_schemas().items(): @@ -1792,7 +1857,8 @@ def to_geff( for k, v in edge_attrs.to_dict().items() } - td_metadata = self.metadata().copy() + td_metadata = self.metadata.copy() + td_metadata.update(self._private_metadata_for_copy()) td_metadata.pop("geff", None) # avoid geff being written multiple times geff_metadata = geff.GeffMetadata( @@ -1830,57 +1896,88 @@ def to_geff( zarr_format=zarr_format, ) - @abc.abstractmethod - def metadata(self) -> dict[str, Any]: + @property + def metadata(self) -> MetadataView: """ Return the metadata of the graph. Returns ------- - dict[str, Any] + MetadataView The metadata of the graph as a dictionary. Examples -------- ```python - metadata = graph.metadata() + metadata = graph.metadata print(metadata["shape"]) ``` """ + return MetadataView( + graph=self, + data={k: v for k, v in self._metadata().items() if not self._is_private_metadata_key(k)}, + is_public=True, + ) - @abc.abstractmethod - def update_metadata(self, **kwargs) -> None: + @property + def _private_metadata(self) -> MetadataView: + return MetadataView( + graph=self, + data={k: v for k, v in self._metadata().items() if self._is_private_metadata_key(k)}, + is_public=False, + ) + + def _private_metadata_for_copy(self) -> dict[str, Any]: """ - Set or update metadata for the graph. + Return private metadata entries that should be propagated by `from_other` or `to_geff`. + Backends can override this to exclude backend-specific private metadata. + """ + return dict(self._private_metadata) - Parameters - ---------- - **kwargs : Any - The metadata items to set by key. Values will be stored as JSON. + @classmethod + def _is_private_metadata_key(cls, key: str) -> bool: + return key.startswith(cls._PRIVATE_METADATA_PREFIX) + + def _validate_metadata_key(self, key: str, *, is_public: bool) -> None: + if not isinstance(key, str): + raise TypeError(f"Metadata key must be a string. Got {type(key)}.") + is_private_key = self._is_private_metadata_key(key) + if is_public and is_private_key: + raise ValueError(f"Metadata key '{key}' is reserved for internal use.") + if not is_public and not is_private_key: + raise ValueError( + f"Metadata key '{key}' is not private. Private metadata keys must start with " + f"'{self._PRIVATE_METADATA_PREFIX}'." + ) - Examples - -------- - ```python - graph.update_metadata(shape=[1, 25, 25], path="path/to/image.ome.zarr") - graph.update_metadata(description="Tracking data from experiment 1") - ``` - """ + def _validate_metadata_keys(self, keys: Sequence[str], *, is_public: bool) -> None: + for key in keys: + self._validate_metadata_key(key, is_public=is_public) + + def _set_metadata_with_validation(self, is_public: bool = True, **kwargs) -> None: + self._validate_metadata_keys(kwargs.keys(), is_public=is_public) + self._update_metadata(**kwargs) + + def _remove_metadata_with_validation(self, key: str, *, is_public: bool = True) -> None: + self._validate_metadata_key(key, is_public=is_public) + self._remove_metadata(key) @abc.abstractmethod - def remove_metadata(self, key: str) -> None: + def _metadata(self) -> dict[str, Any]: + """ + Return the full metadata including private keys. """ - Remove a metadata key from the graph. - Parameters - ---------- - key : str - The key of the metadata to remove. + @abc.abstractmethod + def _update_metadata(self, **kwargs) -> None: + """ + Backend-specific metadata update implementation without public key validation. + """ - Examples - -------- - ```python - graph.remove_metadata("shape") - ``` + @abc.abstractmethod + def _remove_metadata(self, key: str) -> None: + """ + Backend-specific metadata removal implementation without public key validation. """ def to_traccuracy_graph(self, array_view_kwargs: dict[str, Any] | None = None) -> "TrackingGraph": diff --git a/src/tracksdata/graph/_graph_view.py b/src/tracksdata/graph/_graph_view.py index 022429e9..4efa1df2 100644 --- a/src/tracksdata/graph/_graph_view.py +++ b/src/tracksdata/graph/_graph_view.py @@ -870,11 +870,11 @@ def copy(self, **kwargs) -> "GraphView": "Use `detach` to create a new reference-less graph with the same nodes and edges." ) - def metadata(self) -> dict[str, Any]: - return self._root.metadata() + def _metadata(self) -> dict[str, Any]: + return self._root._metadata() - def update_metadata(self, **kwargs) -> None: - self._root.update_metadata(**kwargs) + def _update_metadata(self, **kwargs) -> None: + self._root._update_metadata(**kwargs) - def remove_metadata(self, key: str) -> None: - self._root.remove_metadata(key) + def _remove_metadata(self, key: str) -> None: + self._root._remove_metadata(key) diff --git a/src/tracksdata/graph/_rustworkx_graph.py b/src/tracksdata/graph/_rustworkx_graph.py index a07d509a..e8c1cac1 100644 --- a/src/tracksdata/graph/_rustworkx_graph.py +++ b/src/tracksdata/graph/_rustworkx_graph.py @@ -371,7 +371,7 @@ def __init__(self, rx_graph: rx.PyDiGraph | None = None) -> None: elif not isinstance(self._graph.attrs, dict): LOG.warning( - "previous attribute %s will be added to key 'old_attrs' of `graph.metadata()`", + "previous attribute %s will be added to key 'old_attrs' of `graph.metadata`", self._graph.attrs, ) self._graph.attrs = { @@ -1516,13 +1516,13 @@ def edge_id(self, source_id: int, target_id: int) -> int: """ return self.rx_graph.get_edge_data(source_id, target_id)[DEFAULT_ATTR_KEYS.EDGE_ID] - def metadata(self) -> dict[str, Any]: + def _metadata(self) -> dict[str, Any]: return self._graph.attrs - def update_metadata(self, **kwargs) -> None: + def _update_metadata(self, **kwargs) -> None: self._graph.attrs.update(kwargs) - def remove_metadata(self, key: str) -> None: + def _remove_metadata(self, key: str) -> None: self._graph.attrs.pop(key, None) def edge_list(self) -> list[list[int, int]]: diff --git a/src/tracksdata/graph/_sql_graph.py b/src/tracksdata/graph/_sql_graph.py index cb7682b6..e3eb9a40 100644 --- a/src/tracksdata/graph/_sql_graph.py +++ b/src/tracksdata/graph/_sql_graph.py @@ -2076,19 +2076,19 @@ def remove_edge( raise ValueError(f"Edge {edge_id} does not exist in the graph.") session.commit() - def metadata(self) -> dict[str, Any]: + def _metadata(self) -> dict[str, Any]: with Session(self._engine) as session: result = session.query(self.Metadata).all() return {row.key: row.value for row in result} - def update_metadata(self, **kwargs) -> None: + def _update_metadata(self, **kwargs) -> None: with Session(self._engine) as session: for key, value in kwargs.items(): metadata_entry = self.Metadata(key=key, value=value) session.merge(metadata_entry) session.commit() - def remove_metadata(self, key: str) -> None: + def _remove_metadata(self, key: str) -> None: with Session(self._engine) as session: session.query(self.Metadata).filter(self.Metadata.key == key).delete() session.commit() diff --git a/src/tracksdata/graph/_test/test_graph_backends.py b/src/tracksdata/graph/_test/test_graph_backends.py index 0df5a492..1f943ed7 100644 --- a/src/tracksdata/graph/_test/test_graph_backends.py +++ b/src/tracksdata/graph/_test/test_graph_backends.py @@ -1359,7 +1359,7 @@ def test_from_other_with_edges( ) -> None: """Ensure from_other preserves structure across backend conversions.""" # Create source graph with nodes, edges, and attributes - graph_backend.update_metadata(special_key="special_value") + graph_backend.metadata.update(special_key="special_value") graph_backend.add_node_attr_key("x", dtype=pl.Float64) graph_backend.add_edge_attr_key("weight", dtype=pl.Float64, default_value=-1) @@ -1386,7 +1386,7 @@ def test_from_other_with_edges( assert set(new_graph.node_attr_keys()) == set(graph_backend.node_attr_keys()) assert set(new_graph.edge_attr_keys()) == set(graph_backend.edge_attr_keys()) - assert new_graph.metadata() == graph_backend.metadata() + assert new_graph.metadata == graph_backend.metadata assert new_graph._node_attr_schemas() == graph_backend._node_attr_schemas() assert new_graph._edge_attr_schemas() == graph_backend._edge_attr_schemas() @@ -2341,7 +2341,7 @@ def _fill_mock_geff_graph(graph_backend: BaseGraph) -> None: graph_backend.add_edge_attr_key("weight", pl.Float16) - graph_backend.update_metadata( + graph_backend.metadata.update( shape=[1, 25, 25], path="path/to/image.ome.zarr", ) @@ -2402,11 +2402,11 @@ def test_geff_roundtrip(graph_backend: BaseGraph) -> None: geff_graph, _ = IndexedRXGraph.from_geff(output_store) - assert "geff" in geff_graph.metadata() + assert "geff" in geff_graph.metadata # geff metadata was not stored in original graph - geff_graph.metadata().pop("geff") - assert geff_graph.metadata() == graph_backend.metadata() + geff_graph.metadata.pop("geff") + assert geff_graph.metadata == graph_backend.metadata assert geff_graph.num_nodes() == 3 assert geff_graph.num_edges() == 2 @@ -2461,11 +2461,11 @@ def test_geff_with_keymapping(graph_backend: BaseGraph) -> None: edge_attr_key_map={"weight": "weight_new"}, ) - assert "geff" in geff_graph.metadata() + assert "geff" in geff_graph.metadata # geff metadata was not stored in original graph - geff_graph.metadata().pop("geff") - assert geff_graph.metadata() == graph_backend.metadata() + geff_graph.metadata.pop("geff") + assert geff_graph.metadata == graph_backend.metadata assert geff_graph.num_nodes() == 3 assert geff_graph.num_edges() == 2 @@ -2502,34 +2502,58 @@ def test_metadata_multiple_dtypes(graph_backend: BaseGraph) -> None: } # Update metadata with all test values - graph_backend.update_metadata(**test_metadata) + graph_backend.metadata.update(**test_metadata) # Retrieve and verify - retrieved = graph_backend.metadata() + retrieved = graph_backend.metadata for key, expected_value in test_metadata.items(): assert key in retrieved, f"Key '{key}' not found in metadata" assert retrieved[key] == expected_value, f"Value mismatch for '{key}': {retrieved[key]} != {expected_value}" # Test updating existing keys - graph_backend.update_metadata(string="updated_value", new_key="new_value") - retrieved = graph_backend.metadata() + graph_backend.metadata.update(string="updated_value", new_key="new_value") + retrieved = graph_backend.metadata assert retrieved["string"] == "updated_value" assert retrieved["new_key"] == "new_value" assert retrieved["integer"] == 42 # Other values unchanged # Testing removing metadata - graph_backend.remove_metadata("string") - retrieved = graph_backend.metadata() + graph_backend.metadata.pop("string", None) + retrieved = graph_backend.metadata assert "string" not in retrieved - graph_backend.remove_metadata("mixed_list") - retrieved = graph_backend.metadata() + graph_backend.metadata.pop("mixed_list", None) + retrieved = graph_backend.metadata assert "string" not in retrieved assert "mixed_list" not in retrieved +def test_private_metadata_is_hidden_from_public_apis(graph_backend: BaseGraph) -> None: + private_key = "__private_dtype_map" + + graph_backend._private_metadata.update(**{private_key: {"x": "float64"}}) + graph_backend.metadata.update(shape=[1, 2, 3]) + + public_metadata = graph_backend.metadata + assert private_key not in public_metadata + assert public_metadata["shape"] == [1, 2, 3] + + with pytest.raises(ValueError, match="reserved for internal use"): + graph_backend.metadata.update(**{private_key: {"x": "int64"}}) + + with pytest.raises(ValueError, match="reserved for internal use"): + graph_backend.metadata.pop(private_key, None) + + with pytest.raises(ValueError, match="is not private"): + graph_backend._private_metadata.update(shape=[1, 2, 3]) + + # Private metadata view can remove private keys. + graph_backend._private_metadata.pop(private_key, None) + assert private_key not in graph_backend._metadata() + + def test_pickle_roundtrip(graph_backend: BaseGraph) -> None: if isinstance(graph_backend, SQLGraph): pytest.skip("SQLGraph does not support pickle roundtrip") @@ -2607,7 +2631,7 @@ def test_to_traccuracy_graph(graph_backend: BaseGraph) -> None: graph_backend.add_node_attr_key("y", pl.Float64) graph_backend.add_node_attr_key(DEFAULT_ATTR_KEYS.MASK, pl.Object) graph_backend.add_node_attr_key(DEFAULT_ATTR_KEYS.BBOX, pl.Array(pl.Int64, 4)) - graph_backend.update_metadata(shape=[3, 25, 25]) + graph_backend.metadata.update(shape=[3, 25, 25]) # Create masks for first graph mask1_data = np.array([[True, True], [True, True]], dtype=bool) diff --git a/src/tracksdata/io/_test/test_ctc_io.py b/src/tracksdata/io/_test/test_ctc_io.py index 7c5fb925..01025213 100644 --- a/src/tracksdata/io/_test/test_ctc_io.py +++ b/src/tracksdata/io/_test/test_ctc_io.py @@ -68,7 +68,7 @@ def test_export_from_ctc_roundtrip(tmp_path: Path, metadata_shape: bool) -> None in_graph.add_edge(node_1, node_3, attrs={DEFAULT_ATTR_KEYS.EDGE_DIST: 1.0}) if metadata_shape: - in_graph.update_metadata(shape=(2, 4, 4)) + in_graph.metadata.update(shape=(2, 4, 4)) shape = None else: shape = (2, 4, 4) diff --git a/src/tracksdata/nodes/_regionprops.py b/src/tracksdata/nodes/_regionprops.py index c78feb32..5be49713 100644 --- a/src/tracksdata/nodes/_regionprops.py +++ b/src/tracksdata/nodes/_regionprops.py @@ -230,8 +230,8 @@ def add_nodes( axis_names = self._axis_names(labels) self._init_node_attrs(graph, axis_names, ndims=labels.ndim) - if "shape" not in graph.metadata(): - graph.update_metadata(shape=labels.shape) + if "shape" not in graph.metadata: + graph.metadata.update(shape=labels.shape) if t is None: time_points = range(labels.shape[0]) diff --git a/src/tracksdata/nodes/_test/test_regionprops.py b/src/tracksdata/nodes/_test/test_regionprops.py index 350d231b..567c62e0 100644 --- a/src/tracksdata/nodes/_test/test_regionprops.py +++ b/src/tracksdata/nodes/_test/test_regionprops.py @@ -79,8 +79,8 @@ def test_regionprops_add_nodes_2d() -> None: operator = RegionPropsNodes(extra_properties=extra_properties) operator.add_nodes(graph, labels=labels) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that nodes were added assert graph.num_nodes() == 2 # Two regions (labels 1 and 2) @@ -115,8 +115,8 @@ def test_regionprops_add_nodes_3d() -> None: operator = RegionPropsNodes(extra_properties=extra_properties) operator.add_nodes(graph, labels=labels) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that nodes were added assert graph.num_nodes() == 2 # Two regions @@ -150,8 +150,8 @@ def test_regionprops_add_nodes_with_intensity() -> None: operator.add_nodes(graph, labels=labels, intensity_image=intensity) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that nodes were added with intensity attributes nodes_df = graph.node_attrs() @@ -181,8 +181,8 @@ def test_regionprops_add_nodes_timelapse(n_workers: int) -> None: with options_context(n_workers=n_workers): operator.add_nodes(graph, labels=labels) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that nodes were added for both time points nodes_df = graph.node_attrs() @@ -209,8 +209,8 @@ def test_regionprops_add_nodes_timelapse_with_intensity() -> None: operator.add_nodes(graph, labels=labels, intensity_image=intensity) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that nodes were added with intensity attributes nodes_df = graph.node_attrs() @@ -237,8 +237,8 @@ def double_area(region: RegionProperties) -> float: operator.add_nodes(graph, labels=labels, t=0) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that custom property was calculated nodes_df = graph.node_attrs() @@ -275,8 +275,8 @@ def test_regionprops_mask_creation() -> None: operator.add_nodes(graph, labels=labels, t=0) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that masks were created nodes_df = graph.node_attrs() @@ -300,8 +300,8 @@ def test_regionprops_spacing() -> None: operator.add_nodes(graph, labels=labels, t=0) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # Check that nodes were added (spacing affects internal calculations) nodes_df = graph.node_attrs() @@ -323,8 +323,8 @@ def test_regionprops_empty_labels() -> None: operator.add_nodes(graph, labels=labels, t=0) - assert "shape" in graph.metadata() - assert graph.metadata()["shape"] == labels.shape + assert "shape" in graph.metadata + assert graph.metadata["shape"] == labels.shape # No nodes should be added assert graph.num_nodes() == 0