From cc683acf707578c4b491895a0c2b71be639f52fa Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Fri, 13 Feb 2026 16:43:42 -0500 Subject: [PATCH 1/3] feat: Add zarr v3 support to tensorstore backend Implement automatic zarr format detection (v2 vs v3) in the tensorstore backend. The backend now correctly handles both zarr v2 and v3 stores by: - Detecting zarr format using detect_zarr_format() utility - Setting appropriate FileType (OmeZarrV2 or OmeZarrV3) for TSReader/TSWriter - Using format-specific child enumeration: * v2: filesystem fallback (_list_zarr_children) for reliability * v3: native zarr-python array_keys()/group_keys() methods - Parsing metadata from both v2 (attrs["multiscales"]) and v3 (attrs["ome"]["multiscales"]) locations per NGFF 0.5 spec Writer maintains backward compatibility by defaulting to v2 format. Added comprehensive test coverage: - test_read_zarr_v3: Basic v3 zarr reading with tensorstore backend - test_read_zarr_v3_multi_resolution: Multi-resolution v3 zarr support - Added ExpD_chicken_embryo_MIP.ome.zarr (v3) to test dataset Co-Authored-By: Claude Sonnet 4.5 --- src/bfio/ts_backends.py | 114 +++++++++++++++++++++++++++++++++------- tests/test_read.py | 54 +++++++++++++++++++ 2 files changed, 148 insertions(+), 20 deletions(-) diff --git a/src/bfio/ts_backends.py b/src/bfio/ts_backends.py index 8d748dd..9c8904b 100644 --- a/src/bfio/ts_backends.py +++ b/src/bfio/ts_backends.py @@ -13,7 +13,7 @@ from bfiocpp import TSReader, TSWriter, Seq, FileType, get_ome_xml import bfio.base_classes -from bfio.utils import clean_ome_xml_for_known_issues +from bfio.utils import clean_ome_xml_for_known_issues, detect_zarr_format import zarr @@ -54,7 +54,13 @@ def __init__(self, frontend): ) else: self._file_path, self._axes_list = self.get_zarr_array_info() - self._file_type = FileType.OmeZarrV2 + # Detect zarr format and set appropriate FileType + zarr_format = detect_zarr_format(self.frontend._file_path) + if zarr_format == 3: + self._file_type = FileType.OmeZarrV3 + else: + # Default to v2 for v2 format or unknown + self._file_type = FileType.OmeZarrV2 self._rdr = TSReader(self._file_path, self._file_type, self._axes_list) self.X = self._rdr._X @@ -65,7 +71,13 @@ def __init__(self, frontend): self.data_type = self._rdr._datatype def _list_zarr_children(self, path, child_type="array"): - """Filesystem-based fallback for enumerating zarr v2 store children.""" + """Filesystem-based fallback for enumerating zarr v2 store children. + + In zarr-python v3, array_keys()/group_keys() are unreliable for v2 stores. + This function checks subdirectories for .zarray or .zgroup marker files. + + NOTE: Only use this for v2 stores. For v3 stores, use root.array_keys()/group_keys(). + """ from pathlib import Path as _Path p = _Path(path) @@ -77,6 +89,31 @@ def _list_zarr_children(self, path, child_type="array"): children.append(child.name) return sorted(children) + def _get_zarr_children(self, root, root_path, child_type="array", is_v3=False): + """Get child arrays or groups using the appropriate method for the zarr format. + + For v2: Uses filesystem fallback (_list_zarr_children) + For v3: Uses zarr-python's array_keys()/group_keys() methods + + Args: + root: zarr.Group object + root_path: Path to the zarr store + child_type: "array" or "group" + is_v3: True if this is a v3 format store + + Returns: + List of child names (sorted) + """ + if is_v3: + # For v3, use zarr-python methods which are reliable + if child_type == "array": + return sorted(list(root.array_keys())) + else: + return sorted(list(root.group_keys())) + else: + # For v2, use filesystem fallback + return self._list_zarr_children(str(root_path), child_type) + def get_zarr_array_info(self): self.logger.debug(f"Level is {self.frontend.level}") @@ -89,6 +126,10 @@ def get_zarr_array_info(self): root_path = self.frontend._file_path / "data.zarr" root = zarr.open(root_path.resolve(), mode="r") + # Detect zarr format to choose appropriate enumeration method + zarr_format = detect_zarr_format(root_path) + is_v3 = (zarr_format == 3) + axes_list = "" store_path = str(root_path.resolve()) if self.frontend.level is None: @@ -96,15 +137,24 @@ def get_zarr_array_info(self): return str(root_path.resolve()), axes_list elif isinstance(root, zarr.Group): # the top level is a group, check if this has any arrays - array_keys = self._list_zarr_children(store_path, "array") + array_keys = self._get_zarr_children(root, root_path, "array", is_v3) if len(array_keys) > 0: array_key = array_keys[0] root_path = root_path / str(array_key) try: - axes_metadata = root.attrs["multiscales"][0]["axes"] - axes_list = "".join( - axes["name"].upper() for axes in axes_metadata - ) + # Try v3 location first (NGFF 0.5) + ome_meta = root.attrs.get("ome", {}) + axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes") + if axes_metadata: + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) + else: + # Fall back to v2 location + axes_metadata = root.attrs["multiscales"][0]["axes"] + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) except KeyError: self.logger.warning( "Unable to find multiscales metadata. Z, C and T " @@ -114,14 +164,23 @@ def get_zarr_array_info(self): return str(root_path.resolve()), axes_list else: # need to go one more level - group_keys = self._list_zarr_children(store_path, "group") + group_keys = self._get_zarr_children(root, root_path, "group", is_v3) group_key = group_keys[0] root = root[group_key] try: - axes_metadata = root.attrs["multiscales"][0]["axes"] - axes_list = "".join( - axes["name"].upper() for axes in axes_metadata - ) + # Try v3 location first (NGFF 0.5) + ome_meta = root.attrs.get("ome", {}) + axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes") + if axes_metadata: + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) + else: + # Fall back to v2 location + axes_metadata = root.attrs["multiscales"][0]["axes"] + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) except KeyError: self.logger.warning( "Unable to find multiscales metadata. Z, C and T " @@ -129,7 +188,7 @@ def get_zarr_array_info(self): ) sub_path = str(Path(store_path) / group_key) - sub_array_keys = self._list_zarr_children(sub_path, "array") + sub_array_keys = self._get_zarr_children(root, Path(sub_path), "array", is_v3) array_key = sub_array_keys[0] root_path = root_path / str(group_key) / str(array_key) return str(root_path.resolve()), axes_list @@ -143,14 +202,23 @@ def get_zarr_array_info(self): + "multiple resoulutions." ) elif isinstance(root, zarr.Group): - array_keys = self._list_zarr_children(store_path, "array") + array_keys = self._get_zarr_children(root, root_path, "array", is_v3) if len(array_keys) > self.frontend.level: root_path = root_path / str(self.frontend.level) try: - axes_metadata = root.attrs["multiscales"][0]["axes"] - axes_list = "".join( - axes["name"].upper() for axes in axes_metadata - ) + # Try v3 location first (NGFF 0.5) + ome_meta = root.attrs.get("ome", {}) + axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes") + if axes_metadata: + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) + else: + # Fall back to v2 location + axes_metadata = root.attrs["multiscales"][0]["axes"] + axes_list = "".join( + axes["name"].upper() for axes in axes_metadata + ) except KeyError: self.logger.warning( "Unable to find multiscales metadata. Z, C and T " @@ -184,7 +252,7 @@ def read_metadata(self): self.logger.debug("read_metadata(): Reading metadata...") if self._file_type == FileType.OmeTiff: return self.read_tiff_metadata() - if self._file_type == FileType.OmeZarrV2: + if self._file_type == FileType.OmeZarrV2 or self._file_type == FileType.OmeZarrV3: return self.read_zarr_metadata() def read_image(self, X, Y, Z, C, T): @@ -344,12 +412,18 @@ def _init_writer(self): self.frontend.X, ) + # Check if user wants v3 format via frontend option + # For now, default to v2 to maintain backward compatibility + # Future: could add a frontend.zarr_version attribute + file_type = FileType.OmeZarrV2 + self._writer = TSWriter( str(self.frontend._file_path.joinpath("0").resolve()), shape, (1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE), self.frontend.dtype, "TCZYX", + file_type, ) self.write_metadata() diff --git a/tests/test_read.py b/tests/test_read.py index 3cd8682..e4d1683 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -11,6 +11,7 @@ from ome_zarr.utils import download as zarr_download TEST_IMAGES = { + "ExpD_chicken_embryo_MIP.ome.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0066/ExpD_chicken_embryo_MIP.ome.zarr", "5025551.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0054A/5025551.zarr", "Plate1-Blue-A-12-Scene-3-P3-F2-03.czi": "https://downloads.openmicroscopy.org/images/Zeiss-CZI/idr0011/Plate1-Blue-A_TS-Stinger/Plate1-Blue-A-12-Scene-3-P3-F2-03.czi", "0.tif": "https://osf.io/j6aer/download", @@ -340,6 +341,22 @@ def test_sub_resolution_read(self): get_dims(br) self.assertEqual(br.shape, (1350, 1351, 1, 27)) + def test_read_zarr_v3(self): + """Testing zarr v3 format (NGFF 0.5) read with zarr backend""" + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="zarr" + ) as br: + get_dims(br) + # Verify it's using the zarr backend + self.assertEqual(br._backend_name, "zarr") + # Verify dimensions are read correctly + self.assertEqual(br.shape, (8978, 6510, 1, 1)) + # Verify we can actually read data + data = br[:100, :100, 0, 0] + self.assertEqual(data.shape, (100, 100)) + # Verify dtype + self.assertEqual(br.dtype, np.uint8) + class TestZarrTSReader(unittest.TestCase): def test_get_dims(self): @@ -384,6 +401,43 @@ def test_sub_resolution_read(self): get_dims(br) self.assertEqual(br.shape, (1350, 1351, 1, 27)) + def test_read_zarr_v3(self): + """Testing zarr v3 format (NGFF 0.5) read with tensorstore backend""" + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="tensorstore" + ) as br: + get_dims(br) + # Verify it's using the tensorstore backend + self.assertEqual(br._backend_name, "tensorstore") + # Verify dimensions are read correctly + self.assertEqual(br.shape, (8978, 6510, 1, 1)) + # Verify we can actually read data + data = br[:100, :100, 0, 0] + self.assertEqual(data.shape, (100, 100)) + # Verify dtype + self.assertEqual(br.dtype, np.uint8) + + def test_read_zarr_v3_multi_resolution(self): + """Testing zarr v3 multi-resolution read with tensorstore backend""" + # Test resolution level 0 (highest resolution) + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), + backend="tensorstore", + level=0 + ) as br: + self.assertEqual(br._backend_name, "tensorstore") + self.assertEqual(br.shape, (8978, 6510, 1, 1)) + + # Test resolution level 1 + with bfio.BioReader( + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), + backend="tensorstore", + level=1 + ) as br: + self.assertEqual(br._backend_name, "tensorstore") + # Level 1 should be downsampled by 2x + self.assertEqual(br.shape, (4489, 3255, 1, 1)) + class TestZarrMetadata(unittest.TestCase): def test_set_metadata(self): From b5eb8ed7289b139141e7a789f701b099552e1192 Mon Sep 17 00:00:00 2001 From: Sameeul Samee Date: Tue, 17 Feb 2026 07:05:10 -0500 Subject: [PATCH 2/3] update tests --- src/bfio/ts_backends.py | 18 +++++++++++++----- tests/test_read.py | 14 +++++++------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/bfio/ts_backends.py b/src/bfio/ts_backends.py index 9c8904b..9caab57 100644 --- a/src/bfio/ts_backends.py +++ b/src/bfio/ts_backends.py @@ -76,7 +76,8 @@ def _list_zarr_children(self, path, child_type="array"): In zarr-python v3, array_keys()/group_keys() are unreliable for v2 stores. This function checks subdirectories for .zarray or .zgroup marker files. - NOTE: Only use this for v2 stores. For v3 stores, use root.array_keys()/group_keys(). + NOTE: Only use this for v2 stores. + For v3 stores, use root.array_keys()/group_keys(). """ from pathlib import Path as _Path @@ -128,7 +129,7 @@ def get_zarr_array_info(self): # Detect zarr format to choose appropriate enumeration method zarr_format = detect_zarr_format(root_path) - is_v3 = (zarr_format == 3) + is_v3 = zarr_format == 3 axes_list = "" store_path = str(root_path.resolve()) @@ -164,7 +165,9 @@ def get_zarr_array_info(self): return str(root_path.resolve()), axes_list else: # need to go one more level - group_keys = self._get_zarr_children(root, root_path, "group", is_v3) + group_keys = self._get_zarr_children( + root, root_path, "group", is_v3 + ) group_key = group_keys[0] root = root[group_key] try: @@ -188,7 +191,9 @@ def get_zarr_array_info(self): ) sub_path = str(Path(store_path) / group_key) - sub_array_keys = self._get_zarr_children(root, Path(sub_path), "array", is_v3) + sub_array_keys = self._get_zarr_children( + root, Path(sub_path), "array", is_v3 + ) array_key = sub_array_keys[0] root_path = root_path / str(group_key) / str(array_key) return str(root_path.resolve()), axes_list @@ -252,7 +257,10 @@ def read_metadata(self): self.logger.debug("read_metadata(): Reading metadata...") if self._file_type == FileType.OmeTiff: return self.read_tiff_metadata() - if self._file_type == FileType.OmeZarrV2 or self._file_type == FileType.OmeZarrV3: + if ( + self._file_type == FileType.OmeZarrV2 + or self._file_type == FileType.OmeZarrV3 + ): return self.read_zarr_metadata() def read_image(self, X, Y, Z, C, T): diff --git a/tests/test_read.py b/tests/test_read.py index e4d1683..c10d754 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -344,13 +344,13 @@ def test_sub_resolution_read(self): def test_read_zarr_v3(self): """Testing zarr v3 format (NGFF 0.5) read with zarr backend""" with bfio.BioReader( - TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="zarr" + TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="zarr3" ) as br: get_dims(br) # Verify it's using the zarr backend - self.assertEqual(br._backend_name, "zarr") + self.assertEqual(br._backend_name, "zarr3") # Verify dimensions are read correctly - self.assertEqual(br.shape, (8978, 6510, 1, 1)) + self.assertEqual(br.shape, (8978, 6510)) # Verify we can actually read data data = br[:100, :100, 0, 0] self.assertEqual(data.shape, (100, 100)) @@ -410,7 +410,7 @@ def test_read_zarr_v3(self): # Verify it's using the tensorstore backend self.assertEqual(br._backend_name, "tensorstore") # Verify dimensions are read correctly - self.assertEqual(br.shape, (8978, 6510, 1, 1)) + self.assertEqual(br.shape, (8978, 6510)) # Verify we can actually read data data = br[:100, :100, 0, 0] self.assertEqual(data.shape, (100, 100)) @@ -423,16 +423,16 @@ def test_read_zarr_v3_multi_resolution(self): with bfio.BioReader( TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="tensorstore", - level=0 + level=0, ) as br: self.assertEqual(br._backend_name, "tensorstore") - self.assertEqual(br.shape, (8978, 6510, 1, 1)) + self.assertEqual(br.shape, (8978, 6510)) # Test resolution level 1 with bfio.BioReader( TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="tensorstore", - level=1 + level=1, ) as br: self.assertEqual(br._backend_name, "tensorstore") # Level 1 should be downsampled by 2x From 39ec2ba42c8bc1f55a6d6155a45c84916a8ecc81 Mon Sep 17 00:00:00 2001 From: Sameeul Samee Date: Tue, 17 Feb 2026 07:32:03 -0500 Subject: [PATCH 3/3] update tests --- tests/test_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_read.py b/tests/test_read.py index c10d754..a9df0ce 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -436,7 +436,7 @@ def test_read_zarr_v3_multi_resolution(self): ) as br: self.assertEqual(br._backend_name, "tensorstore") # Level 1 should be downsampled by 2x - self.assertEqual(br.shape, (4489, 3255, 1, 1)) + self.assertEqual(br.shape, (4489, 3255)) class TestZarrMetadata(unittest.TestCase):