From c8be347bd6576de4c1462b772ec1cad8c55e17c2 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 13 Mar 2026 21:57:30 -0400 Subject: [PATCH 1/5] Add skeleton metrics: longest shortest path, branch count, and radii Compute EDT-based radii, longest shortest path distance, and branch count for each skeleton. Write results to a _with_skeletons.csv alongside the input CSV. Add a note that Lee's algorithm has known limitations. --- src/cellmap_analyze/process/skeletonize.py | 105 +++++++++++++++++++-- 1 file changed, 97 insertions(+), 8 deletions(-) diff --git a/src/cellmap_analyze/process/skeletonize.py b/src/cellmap_analyze/process/skeletonize.py index 751ab03..6fc5134 100644 --- a/src/cellmap_analyze/process/skeletonize.py +++ b/src/cellmap_analyze/process/skeletonize.py @@ -1,5 +1,7 @@ import numpy as np import pandas as pd +import networkx as nx +import edt as edt_module from cellmap_analyze.util import dask_util from cellmap_analyze.util.image_data_interface import ImageDataInterface from cellmap_analyze.util.mixins import ComputeConfigMixin @@ -70,6 +72,15 @@ def __init__( logger.info(f"Loaded {len(self.ids)} IDs from {csv_path}") logger.info(f"Output will be written to {output_path}") + @staticmethod + def _empty_metrics(): + return { + "longest_shortest_path_nm": 0.0, + "num_branches": 0, + "radius_mean_nm": np.nan, + "radius_std_nm": np.nan, + } + @staticmethod def calculate_id_skeleton( id_value, @@ -120,7 +131,10 @@ def calculate_id_skeleton( # Check if there's any data if not np.any(data): logger.warning(f"No voxels found for ID {id_value}, skipping") - return + return Skeletonize._empty_metrics() + + # Compute EDT on pre-erosion mask for approximate radii + distance_transform = edt_module.edt(data, anisotropy=tuple(voxel_size)) # Apply erosion if requested if erosion: @@ -147,14 +161,18 @@ def calculate_id_skeleton( simplified_path = f"{output_path}/simplified/{id_value}" empty_skeleton.write_neuroglancer_skeleton(simplified_path) logger.info(f"Wrote empty skeleton for ID {id_value}") - return + return Skeletonize._empty_metrics() - # Skeletonize + # Skeletonize using Lee's algorithm (skimage default). It has + # known limitations (e.g. thin structures may lose branches) but + # is sufficient for now. skel = skeletonize(data) # Check if skeletonization produced anything if not np.any(skel): - logger.warning(f"Skeletonization produced no voxels for ID {id_value}, writing empty skeleton") + logger.warning( + f"Skeletonization produced no voxels for ID {id_value}, writing empty skeleton" + ) # Write empty skeleton files empty_skeleton = CustomSkeleton(vertices=[], edges=[]) full_path = f"{output_path}/full/{id_value}" @@ -162,7 +180,13 @@ def calculate_id_skeleton( simplified_path = f"{output_path}/simplified/{id_value}" empty_skeleton.write_neuroglancer_skeleton(simplified_path) logger.info(f"Wrote empty skeleton for ID {id_value}") - return + return Skeletonize._empty_metrics() + + # Sample radii at skeleton voxel positions + skel_coords = np.argwhere(skel) + radii = distance_transform[ + skel_coords[:, 0], skel_coords[:, 1], skel_coords[:, 2] + ] # Convert to custom skeleton format # spacing parameter scales the vertices by voxel_size @@ -196,6 +220,35 @@ def calculate_id_skeleton( else: pruned = skeleton + # Compute skeleton metrics on pruned skeleton + num_branches = len(pruned.polylines) + longest_shortest_path = 0.0 + if len(pruned.vertices) > 1: + pruned_graph = pruned.skeleton_to_graph() + for component in nx.connected_components(pruned_graph): + if len(component) < 2: + continue + subgraph = pruned_graph.subgraph(component) + start = next(iter(component)) + lengths = nx.single_source_dijkstra_path_length( + subgraph, start, weight="weight" + ) + far_node = max(lengths, key=lengths.get) + lengths2 = nx.single_source_dijkstra_path_length( + subgraph, far_node, weight="weight" + ) + component_diameter = max(lengths2.values()) + longest_shortest_path = max( + longest_shortest_path, component_diameter + ) + + skeleton_metrics = { + "longest_shortest_path_nm": longest_shortest_path, + "num_branches": num_branches, + "radius_mean_nm": float(np.mean(radii)), + "radius_std_nm": float(np.std(radii)), + } + # Simplify if tolerance_nm > 0: simplified = pruned.simplify(tolerance_nm) @@ -214,7 +267,7 @@ def calculate_id_skeleton( simplified_path = f"{output_path}/simplified/{id_value}" empty_skeleton.write_neuroglancer_skeleton(simplified_path) logger.info(f"Wrote empty skeleton for ID {id_value}") - return + return Skeletonize._empty_metrics() # Ensure edges are properly shaped numpy arrays before writing # Handle case where there are no edges (single vertex) @@ -242,6 +295,8 @@ def calculate_id_skeleton( f"Wrote simplified skeleton for ID {id_value}: {len(simplified.vertices)} vertices" ) + return skeleton_metrics + except Exception as e: logger.error(f"Error processing ID {id_value}: {e}", exc_info=True) raise @@ -313,18 +368,48 @@ def skeletonize(self): # Parallelize over IDs using dask num_ids = len(self.ids) + tmp_merge_dir = f"{self.output_path}/_tmp_skeleton_metrics_to_merge" - dask_util.compute_blockwise_partitions( + skeleton_metrics = dask_util.compute_blockwise_partitions( num_ids, self.num_workers, self.compute_args, logger, f"skeletonizing {num_ids} IDs from {self.segmentation_idi.path}", self._skeletonize_id_wrapper, + merge_info=(Skeletonize._merge_skeleton_metrics, tmp_merge_dir), ) + self._write_skeleton_csv(skeleton_metrics) + logger.info("Skeletonization complete") + @staticmethod + def _merge_skeleton_metrics(list_of_results): + merged = [] + for result in list_of_results: + merged.append(result) + return merged + + def _write_skeleton_csv(self, skeleton_metrics): + original_df = pd.read_csv(self.csv_path, index_col=0) + metrics_df = pd.DataFrame(skeleton_metrics) + metrics_df = metrics_df.set_index("id") + metrics_df = metrics_df.rename( + columns={ + "longest_shortest_path_nm": "Longest Shortest Path (nm)", + "num_branches": "Number of Branches", + "radius_mean_nm": "Radius Mean (nm)", + "radius_std_nm": "Radius Std (nm)", + } + ) + combined_df = original_df.join(metrics_df) + csv_dir = os.path.dirname(self.csv_path) + csv_basename = os.path.splitext(os.path.basename(self.csv_path))[0] + output_csv = os.path.join(csv_dir, f"{csv_basename}_with_skeletons.csv") + combined_df.to_csv(output_csv) + logger.info(f"Wrote skeleton metrics CSV to {output_csv}") + def _skeletonize_id_wrapper(self, index): """ Wrapper to call calculate_id_skeleton with the appropriate ID. @@ -333,7 +418,7 @@ def _skeletonize_id_wrapper(self, index): index: Index into self.ids list """ id_value = self.ids[index] - Skeletonize.calculate_id_skeleton( + result = Skeletonize.calculate_id_skeleton( id_value, self.segmentation_idi, self.bbox_df, @@ -342,3 +427,7 @@ def _skeletonize_id_wrapper(self, index): self.min_branch_length_nm, self.tolerance_nm, ) + if result is None: + result = Skeletonize._empty_metrics() + result["id"] = id_value + return result From 0ed6ba2e804fb777d7d04729c325686c85be8257 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 13 Mar 2026 21:57:34 -0400 Subject: [PATCH 2/5] Make cross fixture thick enough for real branches and test skeleton metrics The 2x2 cross-section was too thin for skimage.skeletonize to produce branch points. Use 4x4 arms with distinct lengths and add assertions for branch count, longest shortest path, and radii on ID 5. --- tests/conftest.py | 15 ++++++++------- tests/operations/test_skeletonize.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 530758f..4531a04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -646,13 +646,14 @@ def segmentation_for_skeleton(): ) ** 2 <= radius**2 seg[sphere_mask] = 4 - # ID 5: Cross/plus shape (3D cross with branches in X, Y, Z directions) - # Center bar along Z - seg[35:45, 20:22, 20:22] = 5 - # Branch along X - seg[39:41, 20:22, 15:25] = 5 - # Branch along Y - seg[39:41, 15:25, 20:22] = 5 + # ID 5: Cross/plus shape (3D cross with branches in Z, X, Y directions) + # Thick arms (4x4 cross-section) so skeletonize produces real branches. + # Z arm (top/bottom of junction): z=30..49, 20 voxels long + seg[30:50, 23:27, 23:27] = 5 + # X arm (longest): z=38..41, x=10..39, 30 voxels long + seg[38:42, 23:27, 10:40] = 5 + # Y arm: z=38..41, y=14..35, 22 voxels long + seg[38:42, 14:36, 23:27] = 5 # ID 6: L-shaped structure seg[5:10, 30:35, 30:32] = 6 # Vertical part diff --git a/tests/operations/test_skeletonize.py b/tests/operations/test_skeletonize.py index 665782c..f7f1bfd 100644 --- a/tests/operations/test_skeletonize.py +++ b/tests/operations/test_skeletonize.py @@ -289,6 +289,34 @@ def test_skeletonize_without_erosion(tmp_zarr, tmp_skeletonize_csv): len(full_verts) > 0 ), f"ID {id_val}: No vertices in skeleton without erosion" + # Verify skeleton metrics CSV was written with expected columns + csv_dir = os.path.dirname(tmp_skeletonize_csv) + csv_basename = os.path.splitext(os.path.basename(tmp_skeletonize_csv))[0] + metrics_csv_path = os.path.join(csv_dir, f"{csv_basename}_with_skeletons.csv") + assert os.path.exists(metrics_csv_path), "Skeleton metrics CSV not created" + metrics_df = pd.read_csv(metrics_csv_path, index_col=0) + + # ID 5 (cross shape) should have meaningful skeleton metrics + row5 = metrics_df.loc[5] + + # Cross has 3 arms meeting at a junction -> 3 branches + assert ( + row5["Number of Branches"] >= 3 + ), f"Cross (ID 5) should have >= 3 branches, got {row5['Number of Branches']}" + + # Longest shortest path should span the longest arm pair through the junction + assert ( + row5["Longest Shortest Path (nm)"] > 100 + ), f"Cross (ID 5) longest shortest path too small: {row5['Longest Shortest Path (nm)']}" + + # Radii should be reasonable (arms are 4 voxels wide, voxel_size=8nm -> ~16nm radius) + assert ( + row5["Radius Mean (nm)"] > 10 + ), f"Cross (ID 5) radius mean too small: {row5['Radius Mean (nm)']}" + assert not np.isnan( + row5["Radius Mean (nm)"] + ), "Cross (ID 5) radius mean should not be NaN" + def test_skeletonize_with_pruning_and_simplification(tmp_zarr, tmp_skeletonize_csv): """Test that pruning and simplification reduce the skeleton complexity.""" From 4c293a9ce9e3cff59c82747cc81a3dcaf0e557b8 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 13 Mar 2026 21:57:37 -0400 Subject: [PATCH 3/5] Clean up formatting and remove dead code in morphological operations test --- .../test_morphological_operations.py | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/operations/test_morphological_operations.py b/tests/operations/test_morphological_operations.py index a2920d9..94f0a51 100644 --- a/tests/operations/test_morphological_operations.py +++ b/tests/operations/test_morphological_operations.py @@ -3,19 +3,19 @@ import fastmorph import numpy as np from cellmap_analyze.util.image_data_interface import ( - ImageDataInterface, - ) + ImageDataInterface, +) @pytest.mark.parametrize("operation", ["erosion", "dilation"]) def test_morphological_operations(tmp_zarr, segmentation_cylinders, operation): - num_iterations=1 + num_iterations = 1 mo = MorphologicalOperations( input_path=f"{tmp_zarr}/segmentation_cylinders/s0", output_path=f"{tmp_zarr}/test_morphological_{operation}", num_workers=1, operation=operation, - iterations=num_iterations + iterations=num_iterations, ) mo.perform_morphological_operation() test_data = ImageDataInterface( @@ -24,16 +24,8 @@ def test_morphological_operations(tmp_zarr, segmentation_cylinders, operation): ground_truth = segmentation_cylinders.copy() if operation == "erosion": - ground_truth = fastmorph.erode(ground_truth,iterations=num_iterations) + ground_truth = fastmorph.erode(ground_truth, iterations=num_iterations) else: - ground_truth = fastmorph.dilate(ground_truth,iterations=num_iterations) + ground_truth = fastmorph.dilate(ground_truth, iterations=num_iterations) assert np.array_equal(test_data, ground_truth) -# %% -# import fastmorph -# from cellmap_analyze.util.image_data_interface import ( -# ImageDataInterface, -# ) -# from cellmap_analyze.util.neuroglancer_util import view_in_neuroglancer -# view_in_neuroglancer(original=ImageDataInterface("/tmp/pytest-of-ackermand/pytest-29/tmp0/tmp.zarr/image_with_holes/s0").to_ndarray_ts(),gt=fastmorph.dilate(ImageDataInterface("/tmp/pytest-of-ackermand/pytest-29/tmp0/tmp.zarr/image_with_holes/s0").to_ndarray_ts(),iterations=5),test=ImageDataInterface("/tmp/pytest-of-ackermand/pytest-29/tmp0/tmp.zarr/test_morphological_dilation/s0").to_ndarray_ts()) -# %% From 2f596baaf8b39045f1505c54d8f75df9e4fb5f1a Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 13 Mar 2026 22:00:25 -0400 Subject: [PATCH 4/5] Use exact branch count and approximate longest shortest path in cross test Assert ID 5 has exactly 3 branches and longest shortest path ~160 nm. --- tests/operations/test_skeletonize.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/operations/test_skeletonize.py b/tests/operations/test_skeletonize.py index f7f1bfd..5f26412 100644 --- a/tests/operations/test_skeletonize.py +++ b/tests/operations/test_skeletonize.py @@ -299,15 +299,15 @@ def test_skeletonize_without_erosion(tmp_zarr, tmp_skeletonize_csv): # ID 5 (cross shape) should have meaningful skeleton metrics row5 = metrics_df.loc[5] - # Cross has 3 arms meeting at a junction -> 3 branches + # Cross has 3 arms meeting at a junction -> exactly 3 branches assert ( - row5["Number of Branches"] >= 3 - ), f"Cross (ID 5) should have >= 3 branches, got {row5['Number of Branches']}" + row5["Number of Branches"] == 3 + ), f"Cross (ID 5) should have 3 branches, got {row5['Number of Branches']}" - # Longest shortest path should span the longest arm pair through the junction + # Longest shortest path should be approximately 160 nm assert ( - row5["Longest Shortest Path (nm)"] > 100 - ), f"Cross (ID 5) longest shortest path too small: {row5['Longest Shortest Path (nm)']}" + abs(row5["Longest Shortest Path (nm)"] - 160) < 20 + ), f"Cross (ID 5) longest shortest path should be ~160 nm, got {row5['Longest Shortest Path (nm)']}" # Radii should be reasonable (arms are 4 voxels wide, voxel_size=8nm -> ~16nm radius) assert ( From 82162b608f21f602c93ed330aa7319ba0fc064ba Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Fri, 13 Mar 2026 22:05:02 -0400 Subject: [PATCH 5/5] Assert approximate radius ~16 nm for cross shape in skeleton test --- tests/operations/test_skeletonize.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/operations/test_skeletonize.py b/tests/operations/test_skeletonize.py index 5f26412..f32b5b9 100644 --- a/tests/operations/test_skeletonize.py +++ b/tests/operations/test_skeletonize.py @@ -309,13 +309,10 @@ def test_skeletonize_without_erosion(tmp_zarr, tmp_skeletonize_csv): abs(row5["Longest Shortest Path (nm)"] - 160) < 20 ), f"Cross (ID 5) longest shortest path should be ~160 nm, got {row5['Longest Shortest Path (nm)']}" - # Radii should be reasonable (arms are 4 voxels wide, voxel_size=8nm -> ~16nm radius) + # Radii should be approximately 16 nm (arms are 4 voxels wide, voxel_size=8nm) assert ( - row5["Radius Mean (nm)"] > 10 - ), f"Cross (ID 5) radius mean too small: {row5['Radius Mean (nm)']}" - assert not np.isnan( - row5["Radius Mean (nm)"] - ), "Cross (ID 5) radius mean should not be NaN" + abs(row5["Radius Mean (nm)"] - 16) < 4 + ), f"Cross (ID 5) radius mean should be ~16 nm, got {row5['Radius Mean (nm)']}" def test_skeletonize_with_pruning_and_simplification(tmp_zarr, tmp_skeletonize_csv):