diff --git a/src/bfio/ts_backends.py b/src/bfio/ts_backends.py index 8d748dd..9caab57 100644 --- a/src/bfio/ts_backends.py +++ b/src/bfio/ts_backends.py @@ -13,7 +13,7 @@ from bfiocpp import TSReader, TSWriter, Seq, FileType, get_ome_xml import bfio.base_classes -from bfio.utils import clean_ome_xml_for_known_issues +from bfio.utils import clean_ome_xml_for_known_issues, detect_zarr_format import zarr @@ -54,7 +54,13 @@ def __init__(self, frontend): ) else: self._file_path, self._axes_list = self.get_zarr_array_info() - self._file_type = FileType.OmeZarrV2 + # Detect zarr format and set appropriate FileType + zarr_format = detect_zarr_format(self.frontend._file_path) + if zarr_format == 3: + self._file_type = FileType.OmeZarrV3 + else: + # Default to v2 for v2 format or unknown + self._file_type = FileType.OmeZarrV2 self._rdr = TSReader(self._file_path, self._file_type, self._axes_list) self.X = self._rdr._X @@ -65,7 +71,14 @@ def __init__(self, frontend): self.data_type = self._rdr._datatype def _list_zarr_children(self, path, child_type="array"): - """Filesystem-based fallback for enumerating zarr v2 store children.""" + """Filesystem-based fallback for enumerating zarr v2 store children. + + In zarr-python v3, array_keys()/group_keys() are unreliable for v2 stores. + This function checks subdirectories for .zarray or .zgroup marker files. + + NOTE: Only use this for v2 stores. + For v3 stores, use root.array_keys()/group_keys(). + """ from pathlib import Path as _Path p = _Path(path) @@ -77,6 +90,31 @@ def _list_zarr_children(self, path, child_type="array"): children.append(child.name) return sorted(children) + def _get_zarr_children(self, root, root_path, child_type="array", is_v3=False): + """Get child arrays or groups using the appropriate method for the zarr format. + + For v2: Uses filesystem fallback (_list_zarr_children) + For v3: Uses zarr-python's array_keys()/group_keys() methods + + Args: + root: zarr.Group object + root_path: Path to the zarr store + child_type: "array" or "group" + is_v3: True if this is a v3 format store + + Returns: + List of child names (sorted) + """ + if is_v3: + # For v3, use zarr-python methods which are reliable + if child_type == "array": + return sorted(list(root.array_keys())) + else: + return sorted(list(root.group_keys())) + else: + # For v2, use filesystem fallback + return self._list_zarr_children(str(root_path), child_type) + def get_zarr_array_info(self): self.logger.debug(f"Level is {self.frontend.level}") @@ -89,6 +127,10 @@ def get_zarr_array_info(self): root_path = self.frontend._file_path / "data.zarr" root = zarr.open(root_path.resolve(), mode="r") + # Detect zarr format to choose appropriate enumeration method + zarr_format = detect_zarr_format(root_path) + is_v3 = zarr_format == 3 + axes_list = "" store_path = str(root_path.resolve()) if self.frontend.level is None: @@ -96,15 +138,24 @@ def get_zarr_array_info(self): return str(root_path.resolve()), axes_list elif isinstance(root, zarr.Group): # the top level is a group, check if this has any arrays - array_keys = self._list_zarr_children(store_path, "array") + array_keys = self._get_zarr_children(root, root_path, "array", is_v3) if len(array_keys) > 0: array_key = array_keys[0] root_path = root_path / str(array_key) try: - axes_metadata = root.attrs["multiscales"][0]["axes"] - axes_list = "".join( - axes["name"].upper() for axes in axes_metadata - ) + # Try v3 location first (NGFF 0.5) + ome_meta = root.attrs.get("ome", {}) + axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes") + if axes_metadata: + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) + else: + # Fall back to v2 location + axes_metadata = root.attrs["multiscales"][0]["axes"] + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) except KeyError: self.logger.warning( "Unable to find multiscales metadata. Z, C and T " @@ -114,14 +165,25 @@ def get_zarr_array_info(self): return str(root_path.resolve()), axes_list else: # need to go one more level - group_keys = self._list_zarr_children(store_path, "group") + group_keys = self._get_zarr_children( + root, root_path, "group", is_v3 + ) group_key = group_keys[0] root = root[group_key] try: - axes_metadata = root.attrs["multiscales"][0]["axes"] - axes_list = "".join( - axes["name"].upper() for axes in axes_metadata - ) + # Try v3 location first (NGFF 0.5) + ome_meta = root.attrs.get("ome", {}) + axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes") + if axes_metadata: + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) + else: + # Fall back to v2 location + axes_metadata = root.attrs["multiscales"][0]["axes"] + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) except KeyError: self.logger.warning( "Unable to find multiscales metadata. Z, C and T " @@ -129,7 +191,9 @@ def get_zarr_array_info(self): ) sub_path = str(Path(store_path) / group_key) - sub_array_keys = self._list_zarr_children(sub_path, "array") + sub_array_keys = self._get_zarr_children( + root, Path(sub_path), "array", is_v3 + ) array_key = sub_array_keys[0] root_path = root_path / str(group_key) / str(array_key) return str(root_path.resolve()), axes_list @@ -143,14 +207,23 @@ def get_zarr_array_info(self): + "multiple resoulutions." ) elif isinstance(root, zarr.Group): - array_keys = self._list_zarr_children(store_path, "array") + array_keys = self._get_zarr_children(root, root_path, "array", is_v3) if len(array_keys) > self.frontend.level: root_path = root_path / str(self.frontend.level) try: - axes_metadata = root.attrs["multiscales"][0]["axes"] - axes_list = "".join( - axes["name"].upper() for axes in axes_metadata - ) + # Try v3 location first (NGFF 0.5) + ome_meta = root.attrs.get("ome", {}) + axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes") + if axes_metadata: + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) + else: + # Fall back to v2 location + axes_metadata = root.attrs["multiscales"][0]["axes"] + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) except KeyError: self.logger.warning( "Unable to find multiscales metadata. Z, C and T " @@ -184,7 +257,10 @@ def read_metadata(self): self.logger.debug("read_metadata(): Reading metadata...") if self._file_type == FileType.OmeTiff: return self.read_tiff_metadata() - if self._file_type == FileType.OmeZarrV2: + if ( + self._file_type == FileType.OmeZarrV2 + or self._file_type == FileType.OmeZarrV3 + ): return self.read_zarr_metadata() def read_image(self, X, Y, Z, C, T): @@ -344,12 +420,18 @@ def _init_writer(self): self.frontend.X, ) + # Check if user wants v3 format via frontend option + # For now, default to v2 to maintain backward compatibility + # Future: could add a frontend.zarr_version attribute + file_type = FileType.OmeZarrV2 + self._writer = TSWriter( str(self.frontend._file_path.joinpath("0").resolve()), shape, (1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), self.frontend.dtype, "TCZYX", + file_type, ) self.write_metadata() diff --git a/tests/test_read.py b/tests/test_read.py index 3cd8682..a9df0ce 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -11,6 +11,7 @@ from ome_zarr.utils import download as zarr_download TEST_IMAGES = { + "ExpD_chicken_embryo_MIP.ome.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0066/ExpD_chicken_embryo_MIP.ome.zarr", "5025551.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0054A/5025551.zarr", "Plate1-Blue-A-12-Scene-3-P3-F2-03.czi": "https://downloads.openmicroscopy.org/images/Zeiss-CZI/idr0011/Plate1-Blue-A_TS-Stinger/Plate1-Blue-A-12-Scene-3-P3-F2-03.czi", "0.tif": "https://osf.io/j6aer/download", @@ -340,6 +341,22 @@ def test_sub_resolution_read(self): get_dims(br) self.assertEqual(br.shape, (1350, 1351, 1, 27)) + def test_read_zarr_v3(self): + """Testing zarr v3 format (NGFF 0.5) read with zarr backend""" + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="zarr3" + ) as br: + get_dims(br) + # Verify it's using the zarr backend + self.assertEqual(br._backend_name, "zarr3") + # Verify dimensions are read correctly + self.assertEqual(br.shape, (8978, 6510)) + # Verify we can actually read data + data = br[:100, :100, 0, 0] + self.assertEqual(data.shape, (100, 100)) + # Verify dtype + self.assertEqual(br.dtype, np.uint8) + class TestZarrTSReader(unittest.TestCase): def test_get_dims(self): @@ -384,6 +401,43 @@ def test_sub_resolution_read(self): get_dims(br) self.assertEqual(br.shape, (1350, 1351, 1, 27)) + def test_read_zarr_v3(self): + """Testing zarr v3 format (NGFF 0.5) read with tensorstore backend""" + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="tensorstore" + ) as br: + get_dims(br) + # Verify it's using the tensorstore backend + self.assertEqual(br._backend_name, "tensorstore") + # Verify dimensions are read correctly + self.assertEqual(br.shape, (8978, 6510)) + # Verify we can actually read data + data = br[:100, :100, 0, 0] + self.assertEqual(data.shape, (100, 100)) + # Verify dtype + self.assertEqual(br.dtype, np.uint8) + + def test_read_zarr_v3_multi_resolution(self): + """Testing zarr v3 multi-resolution read with tensorstore backend""" + # Test resolution level 0 (highest resolution) + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), + backend="tensorstore", + level=0, + ) as br: + self.assertEqual(br._backend_name, "tensorstore") + self.assertEqual(br.shape, (8978, 6510)) + + # Test resolution level 1 + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), + backend="tensorstore", + level=1, + ) as br: + self.assertEqual(br._backend_name, "tensorstore") + # Level 1 should be downsampled by 2x + self.assertEqual(br.shape, (4489, 3255)) + class TestZarrMetadata(unittest.TestCase): def test_set_metadata(self):