Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 102 additions & 20 deletions src/bfio/ts_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from bfiocpp import TSReader, TSWriter, Seq, FileType, get_ome_xml
import bfio.base_classes
from bfio.utils import clean_ome_xml_for_known_issues
from bfio.utils import clean_ome_xml_for_known_issues, detect_zarr_format
import zarr


Expand Down Expand Up @@ -54,7 +54,13 @@ def __init__(self, frontend):
)
else:
self._file_path, self._axes_list = self.get_zarr_array_info()
self._file_type = FileType.OmeZarrV2
# Detect zarr format and set appropriate FileType
zarr_format = detect_zarr_format(self.frontend._file_path)
if zarr_format == 3:
self._file_type = FileType.OmeZarrV3
else:
# Default to v2 for v2 format or unknown
self._file_type = FileType.OmeZarrV2

self._rdr = TSReader(self._file_path, self._file_type, self._axes_list)
self.X = self._rdr._X
Expand All @@ -65,7 +71,14 @@ def __init__(self, frontend):
self.data_type = self._rdr._datatype

def _list_zarr_children(self, path, child_type="array"):
"""Filesystem-based fallback for enumerating zarr v2 store children."""
"""Filesystem-based fallback for enumerating zarr v2 store children.

In zarr-python v3, array_keys()/group_keys() are unreliable for v2 stores.
This function checks subdirectories for .zarray or .zgroup marker files.

NOTE: Only use this for v2 stores.
For v3 stores, use root.array_keys()/group_keys().
"""
from pathlib import Path as _Path

p = _Path(path)
Expand All @@ -77,6 +90,31 @@ def _list_zarr_children(self, path, child_type="array"):
children.append(child.name)
return sorted(children)

def _get_zarr_children(self, root, root_path, child_type="array", is_v3=False):
"""Get child arrays or groups using the appropriate method for the zarr format.

For v2: Uses filesystem fallback (_list_zarr_children)
For v3: Uses zarr-python's array_keys()/group_keys() methods

Args:
root: zarr.Group object
root_path: Path to the zarr store
child_type: "array" or "group"
is_v3: True if this is a v3 format store

Returns:
List of child names (sorted)
"""
if is_v3:
# For v3, use zarr-python methods which are reliable
if child_type == "array":
return sorted(list(root.array_keys()))
else:
return sorted(list(root.group_keys()))
else:
# For v2, use filesystem fallback
return self._list_zarr_children(str(root_path), child_type)

def get_zarr_array_info(self):
self.logger.debug(f"Level is {self.frontend.level}")

Expand All @@ -89,22 +127,35 @@ def get_zarr_array_info(self):
root_path = self.frontend._file_path / "data.zarr"
root = zarr.open(root_path.resolve(), mode="r")

# Detect zarr format to choose appropriate enumeration method
zarr_format = detect_zarr_format(root_path)
is_v3 = zarr_format == 3

axes_list = ""
store_path = str(root_path.resolve())
if self.frontend.level is None:
if isinstance(root, zarr.Array):
return str(root_path.resolve()), axes_list
elif isinstance(root, zarr.Group):
# the top level is a group, check if this has any arrays
array_keys = self._list_zarr_children(store_path, "array")
array_keys = self._get_zarr_children(root, root_path, "array", is_v3)
if len(array_keys) > 0:
array_key = array_keys[0]
root_path = root_path / str(array_key)
try:
axes_metadata = root.attrs["multiscales"][0]["axes"]
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
# Try v3 location first (NGFF 0.5)
ome_meta = root.attrs.get("ome", {})
axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes")
if axes_metadata:
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
else:
# Fall back to v2 location
axes_metadata = root.attrs["multiscales"][0]["axes"]
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
except KeyError:
self.logger.warning(
"Unable to find multiscales metadata. Z, C and T "
Expand All @@ -114,22 +165,35 @@ def get_zarr_array_info(self):
return str(root_path.resolve()), axes_list
else:
# need to go one more level
group_keys = self._list_zarr_children(store_path, "group")
group_keys = self._get_zarr_children(
root, root_path, "group", is_v3
)
group_key = group_keys[0]
root = root[group_key]
try:
axes_metadata = root.attrs["multiscales"][0]["axes"]
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
# Try v3 location first (NGFF 0.5)
ome_meta = root.attrs.get("ome", {})
axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes")
if axes_metadata:
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
else:
# Fall back to v2 location
axes_metadata = root.attrs["multiscales"][0]["axes"]
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
except KeyError:
self.logger.warning(
"Unable to find multiscales metadata. Z, C and T "
+ "dimensions might be incorrect."
)

sub_path = str(Path(store_path) / group_key)
sub_array_keys = self._list_zarr_children(sub_path, "array")
sub_array_keys = self._get_zarr_children(
root, Path(sub_path), "array", is_v3
)
array_key = sub_array_keys[0]
root_path = root_path / str(group_key) / str(array_key)
return str(root_path.resolve()), axes_list
Expand All @@ -143,14 +207,23 @@ def get_zarr_array_info(self):
+ "multiple resoulutions."
)
elif isinstance(root, zarr.Group):
array_keys = self._list_zarr_children(store_path, "array")
array_keys = self._get_zarr_children(root, root_path, "array", is_v3)
if len(array_keys) > self.frontend.level:
root_path = root_path / str(self.frontend.level)
try:
axes_metadata = root.attrs["multiscales"][0]["axes"]
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
# Try v3 location first (NGFF 0.5)
ome_meta = root.attrs.get("ome", {})
axes_metadata = ome_meta.get("multiscales", [{}])[0].get("axes")
if axes_metadata:
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
else:
# Fall back to v2 location
axes_metadata = root.attrs["multiscales"][0]["axes"]
axes_list = "".join(
axes["name"].upper() for axes in axes_metadata
)
except KeyError:
self.logger.warning(
"Unable to find multiscales metadata. Z, C and T "
Expand Down Expand Up @@ -184,7 +257,10 @@ def read_metadata(self):
self.logger.debug("read_metadata(): Reading metadata...")
if self._file_type == FileType.OmeTiff:
return self.read_tiff_metadata()
if self._file_type == FileType.OmeZarrV2:
if (
self._file_type == FileType.OmeZarrV2
or self._file_type == FileType.OmeZarrV3
):
return self.read_zarr_metadata()

def read_image(self, X, Y, Z, C, T):
Expand Down Expand Up @@ -344,12 +420,18 @@ def _init_writer(self):
self.frontend.X,
)

# Check if user wants v3 format via frontend option
# For now, default to v2 to maintain backward compatibility
# Future: could add a frontend.zarr_version attribute
file_type = FileType.OmeZarrV2

self._writer = TSWriter(
str(self.frontend._file_path.joinpath("0").resolve()),
shape,
(1, 1, 1, self.frontend._TILE_SIZE, self.frontend._TILE_SIZE),
self.frontend.dtype,
"TCZYX",
file_type,
)

self.write_metadata()
Expand Down
54 changes: 54 additions & 0 deletions tests/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ome_zarr.utils import download as zarr_download

TEST_IMAGES = {
"ExpD_chicken_embryo_MIP.ome.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0066/ExpD_chicken_embryo_MIP.ome.zarr",
"5025551.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0054A/5025551.zarr",
"Plate1-Blue-A-12-Scene-3-P3-F2-03.czi": "https://downloads.openmicroscopy.org/images/Zeiss-CZI/idr0011/Plate1-Blue-A_TS-Stinger/Plate1-Blue-A-12-Scene-3-P3-F2-03.czi",
"0.tif": "https://osf.io/j6aer/download",
Expand Down Expand Up @@ -340,6 +341,22 @@ def test_sub_resolution_read(self):
get_dims(br)
self.assertEqual(br.shape, (1350, 1351, 1, 27))

def test_read_zarr_v3(self):
"""Testing zarr v3 format (NGFF 0.5) read with zarr backend"""
with bfio.BioReader(
TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="zarr3"
) as br:
get_dims(br)
# Verify it's using the zarr backend
self.assertEqual(br._backend_name, "zarr3")
# Verify dimensions are read correctly
self.assertEqual(br.shape, (8978, 6510))
# Verify we can actually read data
data = br[:100, :100, 0, 0]
self.assertEqual(data.shape, (100, 100))
# Verify dtype
self.assertEqual(br.dtype, np.uint8)


class TestZarrTSReader(unittest.TestCase):
def test_get_dims(self):
Expand Down Expand Up @@ -384,6 +401,43 @@ def test_sub_resolution_read(self):
get_dims(br)
self.assertEqual(br.shape, (1350, 1351, 1, 27))

def test_read_zarr_v3(self):
"""Testing zarr v3 format (NGFF 0.5) read with tensorstore backend"""
with bfio.BioReader(
TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"), backend="tensorstore"
) as br:
get_dims(br)
# Verify it's using the tensorstore backend
self.assertEqual(br._backend_name, "tensorstore")
# Verify dimensions are read correctly
self.assertEqual(br.shape, (8978, 6510))
# Verify we can actually read data
data = br[:100, :100, 0, 0]
self.assertEqual(data.shape, (100, 100))
# Verify dtype
self.assertEqual(br.dtype, np.uint8)

def test_read_zarr_v3_multi_resolution(self):
"""Testing zarr v3 multi-resolution read with tensorstore backend"""
# Test resolution level 0 (highest resolution)
with bfio.BioReader(
TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"),
backend="tensorstore",
level=0,
) as br:
self.assertEqual(br._backend_name, "tensorstore")
self.assertEqual(br.shape, (8978, 6510))

# Test resolution level 1
with bfio.BioReader(
TEST_DIR.joinpath("ExpD_chicken_embryo_MIP.ome.zarr"),
backend="tensorstore",
level=1,
) as br:
self.assertEqual(br._backend_name, "tensorstore")
# Level 1 should be downsampled by 2x
self.assertEqual(br.shape, (4489, 3255))


class TestZarrMetadata(unittest.TestCase):
def test_set_metadata(self):
Expand Down