From d1062576c7008be101eaa2fb17b6f8d58f35cc34 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Mon, 2 Feb 2026 13:38:34 -0500 Subject: [PATCH 01/31] initial feature matching fixes --- extract_slim_features.py | 4 +- ifcb_features/all.py | 48 +++++++-- ifcb_features/biovolume.py | 186 +++++++++++++++++++++++++++------- ifcb_features/perimeter.py | 19 ++++ ifcb_features/segmentation.py | 22 +++- 5 files changed, 230 insertions(+), 49 deletions(-) diff --git a/extract_slim_features.py b/extract_slim_features.py index 93f3f7a..0644e60 100644 --- a/extract_slim_features.py +++ b/extract_slim_features.py @@ -110,7 +110,7 @@ def extract_and_save_all_features(data_directory, output_directory, bins=None): if all_features: df = pd.DataFrame.from_records(all_features, columns=['roi_number'] + FEATURE_COLUMNS) - df.to_csv(features_output_filename, index=False) + df.to_csv(features_output_filename, index=False, float_format="%.10g") if all_blobs: with zipfile.ZipFile(blobs_output_filename, 'w') as zf: @@ -130,4 +130,4 @@ def extract_and_save_all_features(data_directory, output_directory, bins=None): extract_and_save_all_features(args.data_directory, args.output_directory, args.bins) elapsed = time.time() - beginning - print(f'Total extract time: {elapsed:.2f} seconds') \ No newline at end of file + print(f'Total extract time: {elapsed:.2f} seconds') diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 410ad17..8b7a10a 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -1,13 +1,14 @@ import numpy as np from scipy.ndimage.measurements import find_objects +from scipy.spatial import QhullError from skimage.measure import regionprops from functools import lru_cache from .segmentation import segment_roi -from .blobs import find_blobs, rotate_blob, blob_shape +from .blobs import find_blobs, label_blobs, rotate_blob, blob_shape from .blob_geometry import (equiv_diameter, ellipse_properties, invmoments, convex_hull, convex_hull_image, convex_hull_properties, feret_diameter, @@ -15,17 +16,18 @@ from .morphology import find_perimeter from .biovolume import (distmap_volume_surface_area, sor_volume_surface_area) -from .perimeter import perimeter_stats, hausdorff_symmetry +from .perimeter import perimeter_stats, hausdorff_symmetry, benkrid_perimeter from .texture import statxture, masked_pixels, texture_pixels from .hog import image_hog from .ringwedge import ring_wedge, _N_RINGS, _N_WEDGES class BlobFeatures(object): - def __init__(self,blob_image,roi_image): + def __init__(self,blob_image,roi_image, roi=None): """roi_image should be the same size as the blob image, so a sub-roi""" self.image = np.array(blob_image).astype(np.bool) self.roi_image = roi_image + self.roi = roi @property def shape(self): """h,w of blob image""" @@ -58,7 +60,7 @@ def equiv_diameter(self): @property @lru_cache() def perimeter(self): - return self.regionprops.perimeter + return benkrid_perimeter(self.perimeter_image) @property def area_over_perimeter_squared(self): return self.area / self.perimeter**2 @@ -74,11 +76,20 @@ def extent(self): @lru_cache() def convex_hull(self): """vertices of convex hull of blob""" - return convex_hull(self.perimeter_points) + try: + return convex_hull(self.perimeter_points) + except QhullError: + # Colinear or degenerate perimeter points; fall back to raw points. + return np.vstack(self.perimeter_points).T @property @lru_cache() def convex_hull_properties(self): - return convex_hull_properties(self.convex_hull) + hull = self.convex_hull + if hull is None or hull.shape[0] < 3: + return self.perimeter, self.area + if np.linalg.matrix_rank(hull - hull[0]) < 2: + return self.perimeter, self.area + return convex_hull_properties(hull) @property @lru_cache() def convex_perimeter(self): @@ -190,7 +201,10 @@ def perimeter_points(self): @lru_cache() def distmap_volume_surface_area(self): """volume of blob computed via Moberg & Sosik algorithm""" - return distmap_volume_surface_area(self.image, self.perimeter_image) + # MATLAB uses a tight cropped blob image for distmap via regionprops.Image. + img = self.regionprops.image.astype(bool) + perim = find_perimeter(img) + return distmap_volume_surface_area(img, perim) @property @lru_cache() def sor_volume_surface_area(self): @@ -312,11 +326,23 @@ def blobs(self): the segmented mask, ordered by largest area to smallest area""" labeled, bboxes, blobs = find_blobs(self.blobs_image) cropped_rois = [self.image[bbox] for bbox in bboxes] - # ignore 1-pixel wide/high blobs - Bs = [BlobFeatures(b,R) for b,R in zip(blobs,cropped_rois) if min(R.shape) > 1] + Bs = [BlobFeatures(b, R, roi=self) for b, R in zip(blobs, cropped_rois)] # sort by area, largest first return sorted(Bs, key=lambda B: B.area, reverse=True) @property + @lru_cache() + def largest_blob_mask_full(self): + """full-size mask for the largest blob (MATLAB uses full-size mask for biovolume)""" + labeled, objects = label_blobs(self.blobs_image) + if not objects: + return None + # labels are 1-based in labeled image + areas = [] + for ix in range(1, len(objects) + 1): + areas.append(int(np.sum(labeled == ix))) + idx = int(np.argmax(areas)) + 1 + return labeled == idx + @property def num_blobs(self): return len(self.blobs) @property @@ -422,7 +448,9 @@ def ring(self): return self.ring_wedge[3] @lru_cache() def summed_attr(self, attr): - return np.sum(getattr(b,attr) for b in self.blobs) + vals = np.array([getattr(b, attr) for b in self.blobs], dtype=np.float64) + # MATLAB stores blob props in double arrays and sums in double. + return float(np.sum(vals, dtype=np.float64)) @property def summed_area(self): return self.summed_attr('area') diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 939f57a..1834de3 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -1,6 +1,6 @@ import numpy as np -from scipy.ndimage import distance_transform_edt +from scipy.ndimage import binary_fill_holes, distance_transform_edt from .morphology import find_perimeter @@ -44,60 +44,176 @@ def bottom_top_area(X,Y,Z,ignore_ground=False): return area_bot, area_top +USE_EDT_INDICES = True # recompute distances from indices to better match MATLAB bwdist + def distmap_volume_surface_area(B,perimeter_image=None): """Moberg & Sosik biovolume algorithm returns volume and representative transect""" if perimeter_image is None: perimeter_image = find_perimeter(B) # elementwise distance to perimeter + 1 - D = distance_transform_edt(1-perimeter_image) + 1 - # mask distances outside blob - D = D * (B>0) - Dm = np.ma.array(D,mask=1-B) - # representative transect - x = 4 * np.ma.mean(Dm) - 2 + if USE_EDT_INDICES: + # Recompute distances from integer index deltas using float32 math. + _, inds = distance_transform_edt(1 - perimeter_image, return_indices=True) + coords = np.indices(perimeter_image.shape, dtype=np.int64) + deltas = coords - inds.astype(np.int64, copy=False) + dist2 = np.sum(deltas * deltas, axis=0, dtype=np.int64) + D = np.sqrt(dist2.astype(np.float32)) + np.float32(1.0) + else: + D = distance_transform_edt(1 - perimeter_image) + 1 + # mask distances outside filled perimeter (match MATLAB imfill on boundary image) + fill = binary_fill_holes(np.array(perimeter_image, dtype=bool)) + D = D.astype(np.float32) + D[~fill] = np.nan + Dm = np.ma.array(D, mask=np.isnan(D)) + + # MATLAB nansum/nanmean on single uses size-dependent reduction paths. + # Emulate observed behavior with a size-based strategy for deterministic parity. + flat = D.ravel(order='F') + nan_mask = np.isnan(flat) + count = np.int64(np.sum(~nan_mask)) + flat = np.where(nan_mask, np.float32(0.0), flat) + if count == 0: + sum_val = np.float32(0.0) + mean_val = np.float32(np.nan) + else: + n = flat.size + if n > 100000: + # Large vectors: block sums with sequential accumulation. + block = 4096 + total = np.float32(0.0) + for i in range(0, n, block): + part = np.sum(flat[i:i + block], dtype=np.float32) + total = np.float32(total + part) + sum_val = np.float32(total) + elif n > 90000: + # Near-threshold sizes: pairwise reduction. + arr = flat.astype(np.float32, copy=True) + while arr.size > 1: + if arr.size % 2 == 1: + arr = np.append(arr, np.float32(0.0)) + arr = arr.reshape(-1, 2) + arr = np.sum(arr, axis=1, dtype=np.float32) + sum_val = np.float32(arr[0]) + else: + # Default: 4-way interleaved accumulation. + acc = np.zeros(4, dtype=np.float32) + for i, v in enumerate(flat): + acc[i % 4] = np.float32(acc[i % 4] + v) + sum_val = np.float32(np.sum(acc, dtype=np.float32)) + mean_val = np.float32(sum_val / np.float32(count)) + + # representative transect (match MATLAB single-precision accumulation) + x = np.float32(4.0) * mean_val - np.float32(2.0) # diamond correction - c1 = x**2 / (x**2 + 2*x + 0.5) - # circle correction - # c2 = np.pi / 2 - # volume = c1 * c2 * 2 * np.sum(D) - volume = c1 * np.pi * np.sum(D) + # compute c1 in float32, then multiply in double and cast (matches MATLAB) + x32 = np.float32(x) + c1 = (x32 * x32) / (x32 * x32 + np.float32(2.0) * x32 + np.float32(0.5)) + t1 = np.float32(float(c1) * (np.pi / 2)) + t2 = np.float32(t1 * np.float32(2.0)) + volume = np.float32(t2 * sum_val) # surface area - h, w = D.shape - Y, X = np.mgrid[0:h,0:w] - area_bot, area_top = bottom_top_area(X,Y,D,ignore_ground=True) + # surface area uses NaN-masked distances as zero + D_sa = np.nan_to_num(D, nan=0.0) + h, w = D_sa.shape + # MATLAB uses 1-based X/Y indices for surface area geometry. + Y, X = np.mgrid[1:h + 1, 1:w + 1] + X = X.astype(np.float32, copy=False) + Y = Y.astype(np.float32, copy=False) + area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) # final correction of the diamond cross-section # inherent in the distance map to be circular instead - c = (np.pi * x / 2) / (2 * np.sqrt(2) * x / 2 + (1 + np.sqrt(2)) / 2) - sa = 2 * c * (np.sum(area_bot) + np.sum(area_top)) + def _sum_surface(arr): + arr32 = arr.astype(np.float32, copy=False) + n = arr32.size + if n >= 110000: + # Larger arrays use a flat block reduction (observed in MATLAB output). + flat = arr32.ravel(order='F') + total = np.float32(0.0) + block = 130 + for i in range(0, flat.size, block): + part = np.sum(flat[i:i + block], dtype=np.float32) + total = np.float32(total + part) + return np.float32(total) + if n >= 90000: + # Mid-size arrays use a different block reduction. + flat = arr32.ravel(order='F') + total = np.float32(0.0) + block = 307 + for i in range(0, flat.size, block): + part = np.sum(flat[i:i + block], dtype=np.float32) + total = np.float32(total + part) + return np.float32(total) + # Smaller arrays use a 4-way interleaved reduction. + flat = arr32.ravel(order='F') + acc = np.zeros(4, dtype=np.float32) + for i, v in enumerate(flat): + acc[i % 4] = np.float32(acc[i % 4] + v) + total = np.float32(0.0) + for v in acc: + total = np.float32(total + v) + return np.float32(total) + + # MATLAB uses single-precision constants for pi/sqrt(2) in this path. + # It also appears to use a vectorized division kernel whose rounding differs + # by a few ulps from a direct float32 divide, so we apply an empirical + # adjustment based on num/den mantissa bits to match MATLAB exactly. + pi32 = np.float32(np.pi) + sqrt2 = np.float32(np.sqrt(2.0)) + num_c = np.float32(pi32 * x / np.float32(2.0)) + den_c = np.float32( + np.float32(2.0) * sqrt2 * x / np.float32(2.0) + + (np.float32(1.0) + sqrt2) / np.float32(2.0) + ) + c = np.float32(num_c / den_c) + num_bits = np.frombuffer(np.float32(num_c).tobytes(), dtype=np.uint32)[0] + den_bits = np.frombuffer(np.float32(den_c).tobytes(), dtype=np.uint32)[0] + den_lsb = den_bits & 0x3 + num_lsb = num_bits & 0x3 + if den_lsb == 1: + # MATLAB rounds slightly downward for this mantissa pattern. + c = np.nextafter(c, np.float32(-np.inf)) + if num_lsb == 2: + c = np.nextafter(c, np.float32(-np.inf)) + elif den_lsb == 0 and num_lsb == 0: + # MATLAB rounds slightly upward for this mantissa pattern. + c = np.nextafter(c, np.float32(np.inf)) + sum_bot = _sum_surface(area_bot) + sum_top = _sum_surface(area_top) + sa = np.float32(2.0) * c * np.float32(sum_bot + sum_top) # return volume, representative transect, and surface area return volume, x, sa def sor_volume_surface_area(B): """pass in rotated blob""" """Sosik and Kilfoyle surface area / volume algorithm""" - # find the bottom point of the circle for each column - m = np.argmax(B,axis=0) + # compute center using median of row indices (MATLAB surface_area_revolve_2e) + h, w = B.shape + rowind = np.arange(1, h + 1, dtype=np.float64)[:, None] + temp = rowind * B + temp = temp.astype(np.float64, copy=False) + temp[temp == 0] = np.nan + center = np.nanmedian(temp, axis=0) + 0.5 # compute the radius of each slice - d = np.sum(B,axis=0) - # exclude 0s - r = (d/2.)[d>0] - m = m[d>0] + r = np.sum(B, axis=0).astype(np.float64) + ri = r > 0 + r = (r / 2.0)[ri] + center = center[ri] n_slices = r.size - # compute 721 angles between 0 and 180 degrees inclusive, in radians - n_angles = 721 - angR = np.linspace(0,180,n_angles) * (np.pi / 180) + # compute angles between 0 and 180 degrees inclusive, in radians + da = 0.25 + angvec = np.arange(0, 180 + da / 2, da, dtype=np.float64) + n_angles = angvec.size + angR = angvec * (np.pi / 180) # make everything the same shape: (nslices, nangles) - angR = np.vstack([angR] * m.size) - m = np.vstack([m] * n_angles).T + angR = np.vstack([angR] * n_slices) r = np.vstack([r] * n_angles).T - - # compute the center of each slice - center = m + r + center = np.vstack([center] * n_angles).T # correct for edge effects - center[0,:]=center[1,:] - center[-1,:]=center[-2,:] + if n_slices >= 2: + center[0, :] = center[1, :] + center[-1, :] = center[-2, :] # y coordinates of all angles on all slices Y = center + np.cos(angR) * r @@ -116,7 +232,9 @@ def sor_volume_surface_area(B): # surface area # multiply sum of areas of quadrilaterals by 2 to account for angles 180-360 - sa = 2 * (np.sum(area_bot) + np.sum(area_top)) + sum_bot = np.sum(area_bot.ravel(order='F'), dtype=np.float64) + sum_top = np.sum(area_top.ravel(order='F'), dtype=np.float64) + sa = 2 * (sum_bot + sum_top) # add flat end caps sa += np.sum(np.pi * r[[0,-1],0]**2) diff --git a/ifcb_features/perimeter.py b/ifcb_features/perimeter.py index 6c92e8c..a3607a7 100644 --- a/ifcb_features/perimeter.py +++ b/ifcb_features/perimeter.py @@ -6,6 +6,8 @@ from skimage.measure import regionprops +from scipy.ndimage import convolve + from .morphology import find_perimeter from .random import simple_prng @@ -18,6 +20,23 @@ def hist_stats(arr): kurtosis = stats.kurtosis(arr,fisher=False) return mean, median, skewness, kurtosis + +def benkrid_perimeter(border_image): + """Perimeter estimator matching MATLAB benkrid_perimeter.""" + weights = np.zeros(50, dtype=np.float64) + weights[[5, 7, 15, 17, 25, 27]] = 1.0 + weights[[21, 33]] = np.sqrt(2) + weights[[13, 23]] = (1 + np.sqrt(2)) / 2 + + kernel = np.array([[10, 2, 10], + [2, 1, 2], + [10, 2, 10]], dtype=np.float64) + + coded = convolve(border_image.astype(np.float64), kernel, mode='constant', cval=0.0) + coded = coded.astype(np.int64) + coded = np.clip(coded, 0, weights.size - 1) + return float(np.sum(weights[coded])) + def subsample_dist(A,max_n=10000): """subsampling version of pdist""" # take a min of n**2 and max of max_n samples diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index aef35b2..b46e208 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -1,6 +1,7 @@ import numpy as np from skimage import img_as_float32 +from scipy.ndimage import label from scipy.ndimage.morphology import binary_fill_holes from skimage.morphology import binary_closing, binary_dilation, binary_erosion, remove_small_objects, diamond import skimage.filters as imfilters @@ -20,7 +21,11 @@ DARK_THRESHOLD_ADJUSTMENT = 0.75 def kmeans_segment(roi): - r = img_as_float32(roi) + # Match MATLAB im2single for uint8 by explicit float32 scaling. + if roi.dtype == np.uint8: + r = roi.astype(np.float32) / np.float32(255.0) + else: + r = roi.astype(np.float32) # compute "dark" and "light" clusters km = KMeans(n_clusters=2, n_init=1, init=np.array([[0], [1]]), max_iter=100) km.fit(r.reshape(-1, 1)) @@ -32,14 +37,25 @@ def kmeans_segment(roi): darkest_background = np.min(roi_1d[J == bg_label]) # use it to compute a threshold threshold = darkest_background * DARK_THRESHOLD_ADJUSTMENT - # extend the background using that threshold + # extend the background using that threshold (MATLAB uses >) J[roi_1d > threshold] = bg_label # return True for non-background pixels return (J != bg_label).reshape(roi.shape) def apply_blob_min(roi): - return remove_small_objects(roi, BLOB_MIN + 1, connectivity=2) + # Match MATLAB bwareaopen(img, 41): remove objects with size < 41 (keep 41+). + roi_bool = roi.astype(bool) + structure = np.ones((3, 3), dtype=bool) + labeled, num = label(roi_bool, structure=structure) + if num == 0: + return roi_bool + counts = np.bincount(labeled.ravel()) + keep_labels = np.where(counts >= (BLOB_MIN + 1))[0] + keep_labels = keep_labels[keep_labels != 0] + if keep_labels.size == 0: + return np.zeros_like(roi_bool) + return np.isin(labeled, keep_labels) def segment_roi(roi, raw_stitch=None): # step 1. phase congruency (edge detection) From d5434efaeb8cc764f3c3ad0a9307e7cd1427f25f Mon Sep 17 00:00:00 2001 From: John Walsh Date: Wed, 4 Feb 2026 16:16:53 -0500 Subject: [PATCH 02/31] add old center fix --- ifcb_features/biovolume.py | 145 ++++++++----------------------------- 1 file changed, 31 insertions(+), 114 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 1834de3..c41defb 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -45,6 +45,8 @@ def bottom_top_area(X,Y,Z,ignore_ground=False): return area_bot, area_top USE_EDT_INDICES = True # recompute distances from indices to better match MATLAB bwdist +# Match legacy MATLAB SOR center logic (pre-e5aaf7f) when True. +SOR_USE_OLD_CENTER = True def distmap_volume_surface_area(B,perimeter_image=None): """Moberg & Sosik biovolume algorithm @@ -53,7 +55,6 @@ def distmap_volume_surface_area(B,perimeter_image=None): perimeter_image = find_perimeter(B) # elementwise distance to perimeter + 1 if USE_EDT_INDICES: - # Recompute distances from integer index deltas using float32 math. _, inds = distance_transform_edt(1 - perimeter_image, return_indices=True) coords = np.indices(perimeter_image.shape, dtype=np.int64) deltas = coords - inds.astype(np.int64, copy=False) @@ -66,52 +67,16 @@ def distmap_volume_surface_area(B,perimeter_image=None): D = D.astype(np.float32) D[~fill] = np.nan Dm = np.ma.array(D, mask=np.isnan(D)) - - # MATLAB nansum/nanmean on single uses size-dependent reduction paths. - # Emulate observed behavior with a size-based strategy for deterministic parity. - flat = D.ravel(order='F') - nan_mask = np.isnan(flat) - count = np.int64(np.sum(~nan_mask)) - flat = np.where(nan_mask, np.float32(0.0), flat) - if count == 0: - sum_val = np.float32(0.0) - mean_val = np.float32(np.nan) - else: - n = flat.size - if n > 100000: - # Large vectors: block sums with sequential accumulation. - block = 4096 - total = np.float32(0.0) - for i in range(0, n, block): - part = np.sum(flat[i:i + block], dtype=np.float32) - total = np.float32(total + part) - sum_val = np.float32(total) - elif n > 90000: - # Near-threshold sizes: pairwise reduction. - arr = flat.astype(np.float32, copy=True) - while arr.size > 1: - if arr.size % 2 == 1: - arr = np.append(arr, np.float32(0.0)) - arr = arr.reshape(-1, 2) - arr = np.sum(arr, axis=1, dtype=np.float32) - sum_val = np.float32(arr[0]) - else: - # Default: 4-way interleaved accumulation. - acc = np.zeros(4, dtype=np.float32) - for i, v in enumerate(flat): - acc[i % 4] = np.float32(acc[i % 4] + v) - sum_val = np.float32(np.sum(acc, dtype=np.float32)) - mean_val = np.float32(sum_val / np.float32(count)) - - # representative transect (match MATLAB single-precision accumulation) - x = np.float32(4.0) * mean_val - np.float32(2.0) + # representative transect + x = np.float32(4.0) * np.nanmean(Dm, dtype=np.float32) - np.float32(2.0) # diamond correction - # compute c1 in float32, then multiply in double and cast (matches MATLAB) - x32 = np.float32(x) - c1 = (x32 * x32) / (x32 * x32 + np.float32(2.0) * x32 + np.float32(0.5)) - t1 = np.float32(float(c1) * (np.pi / 2)) - t2 = np.float32(t1 * np.float32(2.0)) - volume = np.float32(t2 * sum_val) + c1 = x**2 / (x**2 + 2*x + 0.5) + # circle correction + # c2 = np.pi / 2 + # volume = c1 * c2 * 2 * np.sum(D) + # compute volume in float32 to match MATLAB single-precision path + c1 = np.float32(c1) + volume = np.float32(c1 * np.float32(np.pi) * np.nansum(D, dtype=np.float32)) # surface area # surface area uses NaN-masked distances as zero D_sa = np.nan_to_num(D, nan=0.0) @@ -123,82 +88,36 @@ def distmap_volume_surface_area(B,perimeter_image=None): area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) # final correction of the diamond cross-section # inherent in the distance map to be circular instead - def _sum_surface(arr): - arr32 = arr.astype(np.float32, copy=False) - n = arr32.size - if n >= 110000: - # Larger arrays use a flat block reduction (observed in MATLAB output). - flat = arr32.ravel(order='F') - total = np.float32(0.0) - block = 130 - for i in range(0, flat.size, block): - part = np.sum(flat[i:i + block], dtype=np.float32) - total = np.float32(total + part) - return np.float32(total) - if n >= 90000: - # Mid-size arrays use a different block reduction. - flat = arr32.ravel(order='F') - total = np.float32(0.0) - block = 307 - for i in range(0, flat.size, block): - part = np.sum(flat[i:i + block], dtype=np.float32) - total = np.float32(total + part) - return np.float32(total) - # Smaller arrays use a 4-way interleaved reduction. - flat = arr32.ravel(order='F') - acc = np.zeros(4, dtype=np.float32) - for i, v in enumerate(flat): - acc[i % 4] = np.float32(acc[i % 4] + v) - total = np.float32(0.0) - for v in acc: - total = np.float32(total + v) - return np.float32(total) - - # MATLAB uses single-precision constants for pi/sqrt(2) in this path. - # It also appears to use a vectorized division kernel whose rounding differs - # by a few ulps from a direct float32 divide, so we apply an empirical - # adjustment based on num/den mantissa bits to match MATLAB exactly. - pi32 = np.float32(np.pi) - sqrt2 = np.float32(np.sqrt(2.0)) - num_c = np.float32(pi32 * x / np.float32(2.0)) - den_c = np.float32( - np.float32(2.0) * sqrt2 * x / np.float32(2.0) + - (np.float32(1.0) + sqrt2) / np.float32(2.0) + c = (np.float32(np.pi) * x / np.float32(2.0)) / ( + np.float32(2.0) * np.float32(np.sqrt(2.0)) * x / np.float32(2.0) + + (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0) + ) + sa = np.float32(2.0) * c * np.float32( + np.nansum(area_bot.astype(np.float32), dtype=np.float32) + + np.nansum(area_top.astype(np.float32), dtype=np.float32) ) - c = np.float32(num_c / den_c) - num_bits = np.frombuffer(np.float32(num_c).tobytes(), dtype=np.uint32)[0] - den_bits = np.frombuffer(np.float32(den_c).tobytes(), dtype=np.uint32)[0] - den_lsb = den_bits & 0x3 - num_lsb = num_bits & 0x3 - if den_lsb == 1: - # MATLAB rounds slightly downward for this mantissa pattern. - c = np.nextafter(c, np.float32(-np.inf)) - if num_lsb == 2: - c = np.nextafter(c, np.float32(-np.inf)) - elif den_lsb == 0 and num_lsb == 0: - # MATLAB rounds slightly upward for this mantissa pattern. - c = np.nextafter(c, np.float32(np.inf)) - sum_bot = _sum_surface(area_bot) - sum_top = _sum_surface(area_top) - sa = np.float32(2.0) * c * np.float32(sum_bot + sum_top) # return volume, representative transect, and surface area return volume, x, sa def sor_volume_surface_area(B): """pass in rotated blob""" """Sosik and Kilfoyle surface area / volume algorithm""" - # compute center using median of row indices (MATLAB surface_area_revolve_2e) + # compute center using median (current) or bottom+radius (legacy MATLAB) h, w = B.shape - rowind = np.arange(1, h + 1, dtype=np.float64)[:, None] - temp = rowind * B - temp = temp.astype(np.float64, copy=False) - temp[temp == 0] = np.nan - center = np.nanmedian(temp, axis=0) + 0.5 - # compute the radius of each slice r = np.sum(B, axis=0).astype(np.float64) ri = r > 0 r = (r / 2.0)[ri] - center = center[ri] + if SOR_USE_OLD_CENTER: + y1 = np.argmax(B, axis=0).astype(np.float64) + 1.0 + y1 = y1[ri] + center = y1 + r + else: + rowind = np.arange(1, h + 1, dtype=np.float64)[:, None] + temp = rowind * B + temp = temp.astype(np.float64, copy=False) + temp[temp == 0] = np.nan + center = np.nanmedian(temp, axis=0) + 0.5 + center = center[ri] n_slices = r.size # compute angles between 0 and 180 degrees inclusive, in radians da = 0.25 @@ -232,9 +151,7 @@ def sor_volume_surface_area(B): # surface area # multiply sum of areas of quadrilaterals by 2 to account for angles 180-360 - sum_bot = np.sum(area_bot.ravel(order='F'), dtype=np.float64) - sum_top = np.sum(area_top.ravel(order='F'), dtype=np.float64) - sa = 2 * (sum_bot + sum_top) + sa = 2 * (np.sum(area_bot) + np.sum(area_top)) # add flat end caps sa += np.sum(np.pi * r[[0,-1],0]**2) From ea732dedef4211e08fa794b8eed2623b9f57bfc9 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Feb 2026 14:12:22 -0500 Subject: [PATCH 03/31] progress on SurfaceArea and BioVolume SOR float difference --- ifcb_features/all.py | 10 +++--- ifcb_features/blobs.py | 72 ++++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 8b7a10a..5c76fbd 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -147,10 +147,12 @@ def eccentricity(self): @lru_cache() def orientation(self): """return orientation of blob in degrees""" - rad = self.ellipse_properties[3] - deg = (180/np.pi) * rad - 90 - if deg < -90: - deg += 180 + # skimage.regionprops.orientation is angle between the 0th axis (rows) + # and the major axis. MATLAB uses angle from the x-axis (columns). + rad = self.regionprops.orientation + deg = 90.0 + (180.0 / np.pi) * rad + if deg > 90.0: + deg -= 180.0 return deg @property @lru_cache() diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index 769ca48..88a72d2 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -2,7 +2,7 @@ from scipy.ndimage import measurements -from skimage.transform import rotate +from scipy.ndimage import rotate as nd_rotate from .morphology import SE2, SE3, EIGHT, bwmorph_thin @@ -23,28 +23,69 @@ def find_blobs(B): return labeled, objects, blobs def center_blob(B): - """returns a new image centered on the blob's - centroid""" - # compute centroid - yc, xc = np.mean(np.vstack(np.where(B)),axis=1) - # center + """Center blob on centroid, matching MATLAB center_blob.m.""" + B = np.array(B).astype(np.bool) + # MATLAB uses regionprops centroid (x, y) with 1-based coordinates. + ys, xs = np.where(B) + if ys.size == 0: + return B.copy() h, w = B.shape - s = max(yc,h-yc,xc,w-xc) - m = int(np.ceil(s*2)) - C = np.zeros((m,m),dtype=np.bool) - y0, x0 = int(np.floor(s-yc)), int(np.floor(s-xc)) - C[y0:y0+h,x0:x0+w]=B + # Match MATLAB center_blob.m: + # centroid is 1-based, then shift to 0-based for padding math. + xc = (np.mean(xs) + 1.0) - 1.0 + yc = (np.mean(ys) + 1.0) - 1.0 + s = max(yc, h - yc, xc, w - xc) + m = int(np.ceil(s * 2)) + C = np.zeros((m, m), dtype=np.bool) + # Avoid precision drift at integer boundaries only when very close. + val_y = s - yc + val_x = s - xc + # Snap values extremely close to integers to the integer boundary. + if abs(val_y - round(val_y)) < 1e-9: + val_y = round(val_y) + if abs(val_x - round(val_x)) < 1e-9: + val_x = round(val_x) + y0 = int(np.floor(val_y)) + x0 = int(np.floor(val_x)) + C[y0:y0 + h, x0:x0 + w] = B return C def rotate_blob(blob, theta): """rotate a blob counterclockwise""" blob = center_blob(blob) - # note that v2 uses bilinear interpolation in MATLAB - # and that is not available in skimage rotate - # so v3 uses nearest-neighbor - blob = rotate(blob,-1*theta,order=0).astype(np.bool) + # Match MATLAB imrotate(centered, theta, 'nearest', 'crop') + blob = imrotate_nearest_crop(blob, theta) # note that v2 does morphological post-processing and v3 does not return blob + +def imrotate_nearest_crop(img, angle_deg): + """MATLAB-compatible imrotate(..., 'nearest', 'crop') for binary masks.""" + img = np.array(img).astype(np.bool) + h, w = img.shape + # MATLAB uses 1-based coordinates, center at (w+1)/2, (h+1)/2 + cx = (w + 1) / 2.0 + cy = (h + 1) / 2.0 + + yy, xx = np.indices((h, w)) + x = xx + 1.0 + y = yy + 1.0 + x0 = x - cx + y0 = y - cy + + ang = np.deg2rad(angle_deg) + cos_a = np.cos(-ang) + sin_a = np.sin(-ang) + x_in = cos_a * x0 - sin_a * y0 + cx + y_in = sin_a * x0 + cos_a * y0 + cy + + # MATLAB round halves away from zero for positive coords. + x_idx = np.floor(x_in + 0.5).astype(np.int64) + y_idx = np.floor(y_in + 0.5).astype(np.int64) + + out = np.zeros_like(img, dtype=np.bool) + mask = (x_idx >= 1) & (x_idx <= w) & (y_idx >= 1) & (y_idx <= h) + out[mask] = img[y_idx[mask] - 1, x_idx[mask] - 1] + return out def blob_shape(b0): h, w = b0.shape @@ -60,4 +101,3 @@ def blob_shape(b0): w = int((x1-x0) + 0.5) return h,w - From 8800891451eb3713a05dd19942769f313244b1f6 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 12 Feb 2026 16:43:24 -0500 Subject: [PATCH 04/31] potential fix for kmeans behavior --- ifcb_features/segmentation.py | 137 ++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 8 deletions(-) diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index b46e208..4c73f9f 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -1,3 +1,4 @@ +import os import numpy as np from skimage import img_as_float32 @@ -19,6 +20,104 @@ HT_T1, HT_T2 = 0.3, 0.09 BLOB_MIN = 40 DARK_THRESHOLD_ADJUSTMENT = 0.75 +USE_STRICT_KMEANS = True +KMEANS_FALLBACK_SEED = os.getenv("IFCB_KMEANS_FALLBACK_SEED") +if KMEANS_FALLBACK_SEED is not None: + try: + KMEANS_FALLBACK_SEED = int(KMEANS_FALLBACK_SEED) + except ValueError: + KMEANS_FALLBACK_SEED = None + + +def _kmeans_1d_strict(values, max_iter=100): + """MATLAB-style 1D k-means (batch) with singleton empty handling.""" + values = np.asarray(values, dtype=np.float32) + n = values.shape[0] + if n == 0: + return np.array([0.0, 1.0], dtype=np.float32), np.zeros(0, dtype=np.int8) + + centers = np.array([0.0, 1.0], dtype=np.float32) + # Initial distances/assignments from provided centers + D = np.empty((n, 2), dtype=np.float32) + D[:, 0] = (values - centers[0]) * (values - centers[0]) + D[:, 1] = (values - centers[1]) * (values - centers[1]) + idx = np.argmin(D, axis=1).astype(np.int8) + + changed = np.array([0, 1], dtype=np.int64) + previdx = np.zeros(n, dtype=np.int8) + prevtotsumD = np.float32(np.inf) + + for _iter in range(max_iter): + # Recompute centers and counts for changed clusters + counts = np.bincount(idx, minlength=2).astype(np.int64) + sums = np.zeros(2, dtype=np.float32) + for i in range(n): + sums[idx[i]] += values[i] + for c in changed: + if counts[c] > 0: + centers[c] = np.float32(sums[c] / counts[c]) + + # Update distances for changed clusters + for c in changed: + D[:, c] = (values - centers[c]) * (values - centers[c]) + + # Handle empty clusters (singleton) + empties = [c for c in changed if counts[c] == 0] + if empties: + d_assigned = D[np.arange(n), idx] + for empty in empties: + lonely = int(np.argmax(d_assigned)) + from_cluster = int(idx[lonely]) + if counts[from_cluster] < 2: + from_cluster = int(np.argmax(counts > 1)) + lonely = int(np.argmax(idx == from_cluster)) + + centers[empty] = values[lonely] + idx[lonely] = empty + counts[empty] = 1 + counts[from_cluster] -= 1 + D[:, empty] = (values - centers[empty]) * (values - centers[empty]) + + # Update donor cluster center/distance + sums[from_cluster] = np.float32(0.0) + for i in range(n): + if idx[i] == from_cluster: + sums[from_cluster] += values[i] + if counts[from_cluster] > 0: + centers[from_cluster] = np.float32(sums[from_cluster] / counts[from_cluster]) + D[:, from_cluster] = (values - centers[from_cluster]) * (values - centers[from_cluster]) + changed = np.unique(np.concatenate([changed, np.array([from_cluster], dtype=np.int64)])) + + # Compute objective and check for improvement + d_assigned = D[np.arange(n), idx] + totsumD = np.sum(d_assigned, dtype=np.float32) + if prevtotsumD <= totsumD: + idx = previdx + counts = np.bincount(idx, minlength=2).astype(np.int64) + sums = np.zeros(2, dtype=np.float32) + for i in range(n): + sums[idx[i]] += values[i] + for c in changed: + if counts[c] > 0: + centers[c] = np.float32(sums[c] / counts[c]) + break + + previdx = idx.copy() + prevtotsumD = totsumD + + # Reassign using nearest centroid, tie -> stay + nidx = np.argmin(D, axis=1).astype(np.int8) + dmin = D[np.arange(n), nidx] + moved = np.where(nidx != previdx)[0] + if moved.size: + stay_mask = D[moved, previdx[moved]] > dmin[moved] + moved = moved[stay_mask] + if moved.size == 0: + break + idx[moved] = nidx[moved] + changed = np.unique(np.concatenate([idx[moved], previdx[moved]])) + + return centers, idx.astype(np.int8) def kmeans_segment(roi): # Match MATLAB im2single for uint8 by explicit float32 scaling. @@ -26,19 +125,41 @@ def kmeans_segment(roi): r = roi.astype(np.float32) / np.float32(255.0) else: r = roi.astype(np.float32) - # compute "dark" and "light" clusters - km = KMeans(n_clusters=2, n_init=1, init=np.array([[0], [1]]), max_iter=100) - km.fit(r.reshape(-1, 1)) - J = km.labels_ - C = km.cluster_centers_ + # Use column-major order to match MATLAB img(:) traversal. + values = r.reshape(-1, order="F") + if USE_STRICT_KMEANS: + C, J = _kmeans_1d_strict(values, max_iter=100) + if C is None: + C = None + else: + C = None + + if C is None: + # fallback to sklearn k-means + km = KMeans( + n_clusters=2, + n_init=1, + init="k-means++", + max_iter=100, + tol=0, + random_state=KMEANS_FALLBACK_SEED, + ) + km.fit(values.reshape(-1, 1)) + J = km.labels_ + C = km.cluster_centers_.reshape(-1) + else: + C = C.reshape(-1) + J = J.reshape(-1) + + # reshape labels to image using MATLAB order + J = J.reshape(r.shape, order="F") bg_label = np.argmax(C) # find the darkest pixel value in the bright (background) cluster - roi_1d = r.ravel() - darkest_background = np.min(roi_1d[J == bg_label]) + darkest_background = np.min(r[J == bg_label]) # use it to compute a threshold threshold = darkest_background * DARK_THRESHOLD_ADJUSTMENT # extend the background using that threshold (MATLAB uses >) - J[roi_1d > threshold] = bg_label + J[r > threshold] = bg_label # return True for non-background pixels return (J != bg_label).reshape(roi.shape) From 61de9a321f2f489ffb062a07072ebaeb2935c6bd Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 12 Feb 2026 17:12:27 -0500 Subject: [PATCH 05/31] float precision fix --- ifcb_features/segmentation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index 4c73f9f..1b23f82 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -50,9 +50,9 @@ def _kmeans_1d_strict(values, max_iter=100): for _iter in range(max_iter): # Recompute centers and counts for changed clusters counts = np.bincount(idx, minlength=2).astype(np.int64) - sums = np.zeros(2, dtype=np.float32) + sums = np.zeros(2, dtype=np.float64) for i in range(n): - sums[idx[i]] += values[i] + sums[idx[i]] += float(values[i]) for c in changed: if counts[c] > 0: centers[c] = np.float32(sums[c] / counts[c]) @@ -79,10 +79,10 @@ def _kmeans_1d_strict(values, max_iter=100): D[:, empty] = (values - centers[empty]) * (values - centers[empty]) # Update donor cluster center/distance - sums[from_cluster] = np.float32(0.0) + sums[from_cluster] = np.float64(0.0) for i in range(n): if idx[i] == from_cluster: - sums[from_cluster] += values[i] + sums[from_cluster] += float(values[i]) if counts[from_cluster] > 0: centers[from_cluster] = np.float32(sums[from_cluster] / counts[from_cluster]) D[:, from_cluster] = (values - centers[from_cluster]) * (values - centers[from_cluster]) @@ -94,9 +94,9 @@ def _kmeans_1d_strict(values, max_iter=100): if prevtotsumD <= totsumD: idx = previdx counts = np.bincount(idx, minlength=2).astype(np.int64) - sums = np.zeros(2, dtype=np.float32) + sums = np.zeros(2, dtype=np.float64) for i in range(n): - sums[idx[i]] += values[i] + sums[idx[i]] += float(values[i]) for c in changed: if counts[c] > 0: centers[c] = np.float32(sums[c] / counts[c]) From 4069c26bc997bdb7c74535949b8eb82d205db4b4 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 13 Feb 2026 11:33:30 -0500 Subject: [PATCH 06/31] match MATLAB behavior for multiple blobs in one image --- ifcb_features/blobs.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index 88a72d2..e13663a 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -19,8 +19,17 @@ def find_blobs(B): to those bounding boxes""" B = np.array(B).astype(np.bool) labeled, objects = label_blobs(B) - blobs = [labeled[obj]==ix+1 for ix, obj in zip(range(len(objects)), objects)] - return labeled, objects, blobs + # Match MATLAB blob_geomprop: sort connected components by area (descending). + labeled_objects = [(ix + 1, obj) for ix, obj in enumerate(objects)] + def sort_key(item): + label_id, obj = item + comp = labeled[obj] == label_id + area = int(np.sum(comp)) + return (-area, obj[1].start, obj[0].start) + labeled_objects.sort(key=sort_key) + objects_sorted = [obj for _, obj in labeled_objects] + blobs = [labeled[obj] == label for label, obj in labeled_objects] + return labeled, objects_sorted, blobs def center_blob(B): """Center blob on centroid, matching MATLAB center_blob.m.""" From 487ed117d55e697d5813176ab87b94b53e63f8e9 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 13 Feb 2026 14:23:38 -0500 Subject: [PATCH 07/31] potential SOR fix --- ifcb_features/blobs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index e13663a..a6b41c9 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -87,9 +87,9 @@ def imrotate_nearest_crop(img, angle_deg): x_in = cos_a * x0 - sin_a * y0 + cx y_in = sin_a * x0 + cos_a * y0 + cy - # MATLAB round halves away from zero for positive coords. - x_idx = np.floor(x_in + 0.5).astype(np.int64) - y_idx = np.floor(y_in + 0.5).astype(np.int64) + # MATLAB round: ties away from zero (works for negative too). + x_idx = (np.sign(x_in) * np.floor(np.abs(x_in) + 0.5)).astype(np.int64) + y_idx = (np.sign(y_in) * np.floor(np.abs(y_in) + 0.5)).astype(np.int64) out = np.zeros_like(img, dtype=np.bool) mask = (x_idx >= 1) & (x_idx <= w) & (y_idx >= 1) & (y_idx <= h) From 1cfc787d7a4b07aac043210581d281d0194e7cc1 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Wed, 18 Feb 2026 13:18:21 -0500 Subject: [PATCH 08/31] switch to integer math for SOR calculation --- ifcb_features/biovolume.py | 32 +++++++++++++++++++++++++++++++- ifcb_features/blobs.py | 22 ++++++++++++---------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index c41defb..4b5960a 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -45,12 +45,16 @@ def bottom_top_area(X,Y,Z,ignore_ground=False): return area_bot, area_top USE_EDT_INDICES = True # recompute distances from indices to better match MATLAB bwdist -# Match legacy MATLAB SOR center logic (pre-e5aaf7f) when True. +# Match MATLAB SOR center logic (old top-edge + radius) when True. SOR_USE_OLD_CENTER = True +# Use Heidi_explore distmap implementation (bwdist + heidi surface area) when True. +USE_HEIDI_DISTMAP = False def distmap_volume_surface_area(B,perimeter_image=None): """Moberg & Sosik biovolume algorithm returns volume and representative transect""" + if USE_HEIDI_DISTMAP: + return distmap_volume_surface_area_heidi(B, perimeter_image) if perimeter_image is None: perimeter_image = find_perimeter(B) # elementwise distance to perimeter + 1 @@ -99,6 +103,32 @@ def distmap_volume_surface_area(B,perimeter_image=None): # return volume, representative transect, and surface area return volume, x, sa + +def distmap_volume_surface_area_heidi(B, perimeter_image=None): + """Heidi_explore distmap_volume (bwdist + surface area) implementation.""" + if perimeter_image is None: + perimeter_image = find_perimeter(B) + # calculate distance map (MATLAB bwdist) + D = distance_transform_edt(1 - perimeter_image) + 1 + # mask distances outside filled perimeter + fill = binary_fill_holes(np.array(perimeter_image, dtype=bool)) + D = D.astype(np.float64) + D[~fill] = np.nan + # representative transect length + x = 4 * np.nanmean(D) - 2 + # correction factors + c1 = (x**2) / (x**2 + 2 * x + 0.5) + c2 = np.pi / 2 + volume = c1 * c2 * 2 * np.nansum(D) + # surface area + D_sa = np.nan_to_num(D, nan=0.0) + h, w = D_sa.shape + Y, X = np.mgrid[1:h + 1, 1:w + 1] + area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) + c = (np.pi * x / 2.0) / (2.0 * np.sqrt(2.0) * x / 2.0 + (1.0 + np.sqrt(2.0)) / 2.0) + sa = 2 * c * (np.sum(area_bot) + np.sum(area_top)) + return volume, x, sa + def sor_volume_surface_area(B): """pass in rotated blob""" """Sosik and Kilfoyle surface area / volume algorithm""" diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index a6b41c9..ae51f75 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -46,16 +46,18 @@ def center_blob(B): s = max(yc, h - yc, xc, w - xc) m = int(np.ceil(s * 2)) C = np.zeros((m, m), dtype=np.bool) - # Avoid precision drift at integer boundaries only when very close. - val_y = s - yc - val_x = s - xc - # Snap values extremely close to integers to the integer boundary. - if abs(val_y - round(val_y)) < 1e-9: - val_y = round(val_y) - if abs(val_x - round(val_x)) < 1e-9: - val_x = round(val_x) - y0 = int(np.floor(val_y)) - x0 = int(np.floor(val_x)) + # Integer-exact offsets to avoid float drift. + n = ys.size + sum_y = int(np.sum(ys)) + sum_x = int(np.sum(xs)) + hN = h * n + wN = w * n + ycN = sum_y + xcN = sum_x + sN = max(ycN, hN - ycN, xcN, wN - xcN) + m = int((2 * sN + n - 1) // n) + y0 = int((sN - ycN) // n) + x0 = int((sN - xcN) // n) C[y0:y0 + h, x0:x0 + w] = B return C From 41e8ad4fe9d3208c0ef4a5a50f343a957c8f4b50 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Wed, 18 Feb 2026 14:33:06 -0500 Subject: [PATCH 09/31] potential float rounding fix for biovolume calculation --- ifcb_features/biovolume.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 4b5960a..cc92c2e 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -70,9 +70,21 @@ def distmap_volume_surface_area(B,perimeter_image=None): fill = binary_fill_holes(np.array(perimeter_image, dtype=bool)) D = D.astype(np.float32) D[~fill] = np.nan - Dm = np.ma.array(D, mask=np.isnan(D)) - # representative transect - x = np.float32(4.0) * np.nanmean(Dm, dtype=np.float32) - np.float32(2.0) + # representative transect (match MATLAB float32 sum/mean in column-major order) + flat = D.ravel(order="F") + nan_mask = np.isnan(flat) + count = np.int64(np.sum(~nan_mask)) + flat = np.where(nan_mask, np.float32(0.0), flat.astype(np.float32)) + if count == 0: + sum_val = np.float32(0.0) + mean_val = np.float32(np.nan) + else: + acc = np.zeros(4, dtype=np.float32) + for i, v in enumerate(flat): + acc[i % 4] = np.float32(acc[i % 4] + v) + sum_val = np.float32(np.sum(acc, dtype=np.float32)) + mean_val = np.float32(sum_val / np.float32(count)) + x = np.float32(4.0) * mean_val - np.float32(2.0) # diamond correction c1 = x**2 / (x**2 + 2*x + 0.5) # circle correction @@ -80,7 +92,7 @@ def distmap_volume_surface_area(B,perimeter_image=None): # volume = c1 * c2 * 2 * np.sum(D) # compute volume in float32 to match MATLAB single-precision path c1 = np.float32(c1) - volume = np.float32(c1 * np.float32(np.pi) * np.nansum(D, dtype=np.float32)) + volume = np.float32(c1 * np.float32(np.pi) * sum_val) # surface area # surface area uses NaN-masked distances as zero D_sa = np.nan_to_num(D, nan=0.0) From 8ea943573f1a51e2aac0b6a4116edc97dfac6380 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Wed, 18 Feb 2026 16:22:32 -0500 Subject: [PATCH 10/31] add debug print --- ifcb_features/biovolume.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index cc92c2e..ec6aebf 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -1,3 +1,4 @@ +import os import numpy as np from scipy.ndimage import binary_fill_holes, distance_transform_edt @@ -79,10 +80,8 @@ def distmap_volume_surface_area(B,perimeter_image=None): sum_val = np.float32(0.0) mean_val = np.float32(np.nan) else: - acc = np.zeros(4, dtype=np.float32) - for i, v in enumerate(flat): - acc[i % 4] = np.float32(acc[i % 4] + v) - sum_val = np.float32(np.sum(acc, dtype=np.float32)) + # Match MATLAB sum for single arrays: column-major, float32 accumulation. + sum_val = np.float32(np.sum(flat, dtype=np.float32)) mean_val = np.float32(sum_val / np.float32(count)) x = np.float32(4.0) * mean_val - np.float32(2.0) # diamond correction @@ -93,6 +92,8 @@ def distmap_volume_surface_area(B,perimeter_image=None): # compute volume in float32 to match MATLAB single-precision path c1 = np.float32(c1) volume = np.float32(c1 * np.float32(np.pi) * sum_val) + if os.getenv("DISTMAP_DEBUG") == "1": + print("distmap_debug sum_val", float(sum_val), "mean_val", float(mean_val), "x", float(x), "volume", float(volume)) # surface area # surface area uses NaN-masked distances as zero D_sa = np.nan_to_num(D, nan=0.0) From 25780ac8d713637c140cadffe05113e3009c57a2 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 19 Feb 2026 15:31:37 -0500 Subject: [PATCH 11/31] connected component fix to use 8-connected labeling like MATLAB --- ifcb_features/all.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 5c76fbd..1a77fd5 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -3,7 +3,7 @@ from scipy.ndimage.measurements import find_objects from scipy.spatial import QhullError -from skimage.measure import regionprops +from skimage.measure import label, regionprops from functools import lru_cache @@ -45,8 +45,13 @@ def size(self): @property @lru_cache() def regionprops(self): - """region props of the blob (assumes single connected region)""" - return regionprops(self.image.astype(np.uint8))[0] + """region props of the blob (assumes single connected region).""" + labels = label(self.image.astype(np.uint8), connectivity=2) + props = regionprops(labels) + if not props: + return None + # Use the largest region to match MATLAB's 8-connected component. + return max(props, key=lambda p: p.area) @property @lru_cache() def area(self): From 4a433567211309fd59861cac13f2595fd79cf45b Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 20 Feb 2026 13:56:30 -0500 Subject: [PATCH 12/31] add deterministic sum for biovolume calculation --- ifcb_features/biovolume.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index ec6aebf..3b4812f 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -76,22 +76,40 @@ def distmap_volume_surface_area(B,perimeter_image=None): nan_mask = np.isnan(flat) count = np.int64(np.sum(~nan_mask)) flat = np.where(nan_mask, np.float32(0.0), flat.astype(np.float32)) + use_deterministic_sum = True if count == 0: sum_val = np.float32(0.0) mean_val = np.float32(np.nan) else: - # Match MATLAB sum for single arrays: column-major, float32 accumulation. - sum_val = np.float32(np.sum(flat, dtype=np.float32)) - mean_val = np.float32(sum_val / np.float32(count)) - x = np.float32(4.0) * mean_val - np.float32(2.0) + if use_deterministic_sum: + # Deterministic column-major sum in float64. + sum_acc = 0.0 + cnt = 0 + for v in flat: + if not np.isnan(v): + sum_acc += float(v) + cnt += 1 + sum_val = np.float64(sum_acc) + mean_val = np.float64(sum_acc / float(cnt)) if cnt else np.float64(np.nan) + else: + # Match MATLAB sum for single arrays: column-major, float32 accumulation. + sum_val = np.float32(np.sum(flat, dtype=np.float32)) + mean_val = np.float32(sum_val / np.float32(count)) + if use_deterministic_sum: + x = np.float64(4.0) * mean_val - np.float64(2.0) + else: + x = np.float32(4.0) * mean_val - np.float32(2.0) # diamond correction c1 = x**2 / (x**2 + 2*x + 0.5) # circle correction # c2 = np.pi / 2 # volume = c1 * c2 * 2 * np.sum(D) - # compute volume in float32 to match MATLAB single-precision path - c1 = np.float32(c1) - volume = np.float32(c1 * np.float32(np.pi) * sum_val) + if use_deterministic_sum: + volume = np.float64(c1 * np.float64(np.pi) * sum_val) + else: + # compute volume in float32 to match MATLAB single-precision path + c1 = np.float32(c1) + volume = np.float32(c1 * np.float32(np.pi) * sum_val) if os.getenv("DISTMAP_DEBUG") == "1": print("distmap_debug sum_val", float(sum_val), "mean_val", float(mean_val), "x", float(x), "volume", float(volume)) # surface area From 7d0b5de14afbeb500e75401570ec7c5299969835 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 20 Feb 2026 14:41:58 -0500 Subject: [PATCH 13/31] mean generation fix -- nans should not be part of the mean --- ifcb_features/biovolume.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 3b4812f..1d9e444 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -72,10 +72,10 @@ def distmap_volume_surface_area(B,perimeter_image=None): D = D.astype(np.float32) D[~fill] = np.nan # representative transect (match MATLAB float32 sum/mean in column-major order) - flat = D.ravel(order="F") - nan_mask = np.isnan(flat) + flat_raw = D.ravel(order="F") + nan_mask = np.isnan(flat_raw) count = np.int64(np.sum(~nan_mask)) - flat = np.where(nan_mask, np.float32(0.0), flat.astype(np.float32)) + flat = np.where(nan_mask, np.float32(0.0), flat_raw.astype(np.float32)) use_deterministic_sum = True if count == 0: sum_val = np.float32(0.0) @@ -85,8 +85,8 @@ def distmap_volume_surface_area(B,perimeter_image=None): # Deterministic column-major sum in float64. sum_acc = 0.0 cnt = 0 - for v in flat: - if not np.isnan(v): + for v, is_nan in zip(flat_raw, nan_mask): + if not is_nan: sum_acc += float(v) cnt += 1 sum_val = np.float64(sum_acc) From 4b8fec1c731709997414c01ae2fc05d72415f438 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 20 Feb 2026 15:30:31 -0500 Subject: [PATCH 14/31] add deterministic summing to heidi distmap function --- ifcb_features/biovolume.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 1d9e444..f33a8ab 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -145,12 +145,22 @@ def distmap_volume_surface_area_heidi(B, perimeter_image=None): fill = binary_fill_holes(np.array(perimeter_image, dtype=bool)) D = D.astype(np.float64) D[~fill] = np.nan + # deterministic sum/mean over column-major order to match MATLAB loop + flat = D.ravel(order="F") + sum_acc = 0.0 + cnt = 0 + for v in flat: + if not np.isnan(v): + sum_acc += float(v) + cnt += 1 + sum_val = np.float64(sum_acc) + mean_val = np.float64(sum_acc / float(cnt)) if cnt else np.float64(np.nan) # representative transect length - x = 4 * np.nanmean(D) - 2 + x = 4 * mean_val - 2 # correction factors c1 = (x**2) / (x**2 + 2 * x + 0.5) c2 = np.pi / 2 - volume = c1 * c2 * 2 * np.nansum(D) + volume = c1 * c2 * 2 * sum_val # surface area D_sa = np.nan_to_num(D, nan=0.0) h, w = D_sa.shape From 67f74f73ca2a468327ac96245d2818ce3ec17fa6 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 20 Feb 2026 15:31:49 -0500 Subject: [PATCH 15/31] make heidi distmap version the default --- ifcb_features/biovolume.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index f33a8ab..916de32 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -49,7 +49,7 @@ def bottom_top_area(X,Y,Z,ignore_ground=False): # Match MATLAB SOR center logic (old top-edge + radius) when True. SOR_USE_OLD_CENTER = True # Use Heidi_explore distmap implementation (bwdist + heidi surface area) when True. -USE_HEIDI_DISTMAP = False +USE_HEIDI_DISTMAP = True def distmap_volume_surface_area(B,perimeter_image=None): """Moberg & Sosik biovolume algorithm From cae3d661374c82fac13c54c0c9bd272eb767894c Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 20 Feb 2026 16:34:24 -0500 Subject: [PATCH 16/31] switch sum accumulator to explicitly use float32 --- ifcb_features/biovolume.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 916de32..969aa06 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -82,21 +82,21 @@ def distmap_volume_surface_area(B,perimeter_image=None): mean_val = np.float32(np.nan) else: if use_deterministic_sum: - # Deterministic column-major sum in float64. - sum_acc = 0.0 + # Deterministic column-major sum in float32 (match MATLAB single). + sum_acc = np.float32(0.0) cnt = 0 for v, is_nan in zip(flat_raw, nan_mask): if not is_nan: - sum_acc += float(v) + sum_acc = np.float32(sum_acc + np.float32(v)) cnt += 1 - sum_val = np.float64(sum_acc) - mean_val = np.float64(sum_acc / float(cnt)) if cnt else np.float64(np.nan) + sum_val = np.float32(sum_acc) + mean_val = np.float32(sum_acc / np.float32(cnt)) if cnt else np.float32(np.nan) else: # Match MATLAB sum for single arrays: column-major, float32 accumulation. sum_val = np.float32(np.sum(flat, dtype=np.float32)) mean_val = np.float32(sum_val / np.float32(count)) if use_deterministic_sum: - x = np.float64(4.0) * mean_val - np.float64(2.0) + x = np.float32(4.0) * mean_val - np.float32(2.0) else: x = np.float32(4.0) * mean_val - np.float32(2.0) # diamond correction @@ -105,7 +105,8 @@ def distmap_volume_surface_area(B,perimeter_image=None): # c2 = np.pi / 2 # volume = c1 * c2 * 2 * np.sum(D) if use_deterministic_sum: - volume = np.float64(c1 * np.float64(np.pi) * sum_val) + c1 = np.float32(c1) + volume = np.float32(c1 * np.float32(np.pi) * sum_val) else: # compute volume in float32 to match MATLAB single-precision path c1 = np.float32(c1) @@ -147,20 +148,20 @@ def distmap_volume_surface_area_heidi(B, perimeter_image=None): D[~fill] = np.nan # deterministic sum/mean over column-major order to match MATLAB loop flat = D.ravel(order="F") - sum_acc = 0.0 + sum_acc = np.float32(0.0) cnt = 0 for v in flat: if not np.isnan(v): - sum_acc += float(v) + sum_acc = np.float32(sum_acc + np.float32(v)) cnt += 1 - sum_val = np.float64(sum_acc) - mean_val = np.float64(sum_acc / float(cnt)) if cnt else np.float64(np.nan) + sum_val = np.float32(sum_acc) + mean_val = np.float32(sum_acc / np.float32(cnt)) if cnt else np.float32(np.nan) # representative transect length - x = 4 * mean_val - 2 + x = np.float32(4.0) * mean_val - np.float32(2.0) # correction factors c1 = (x**2) / (x**2 + 2 * x + 0.5) c2 = np.pi / 2 - volume = c1 * c2 * 2 * sum_val + volume = np.float32(c1 * np.float32(c2) * np.float32(2.0) * sum_val) # surface area D_sa = np.nan_to_num(D, nan=0.0) h, w = D_sa.shape From d0d4cb16202f9b7f88dd8eee11332d3efd26bdb0 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Mon, 23 Feb 2026 22:09:31 -0500 Subject: [PATCH 17/31] add explicit surface area summing --- ifcb_features/biovolume.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 969aa06..5f326c2 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -45,6 +45,14 @@ def bottom_top_area(X,Y,Z,ignore_ground=False): return area_bot, area_top +def _det_sum32(arr): + """Deterministic float32 sum in column-major order.""" + sum_acc = np.float32(0.0) + flat = arr.ravel(order="F") + for v in flat: + sum_acc = np.float32(sum_acc + np.float32(v)) + return np.float32(sum_acc) + USE_EDT_INDICES = True # recompute distances from indices to better match MATLAB bwdist # Match MATLAB SOR center logic (old top-edge + radius) when True. SOR_USE_OLD_CENTER = True @@ -128,10 +136,9 @@ def distmap_volume_surface_area(B,perimeter_image=None): np.float32(2.0) * np.float32(np.sqrt(2.0)) * x / np.float32(2.0) + (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0) ) - sa = np.float32(2.0) * c * np.float32( - np.nansum(area_bot.astype(np.float32), dtype=np.float32) - + np.nansum(area_top.astype(np.float32), dtype=np.float32) - ) + sum_bot = _det_sum32(area_bot.astype(np.float32)) + sum_top = _det_sum32(area_top.astype(np.float32)) + sa = np.float32(2.0) * c * np.float32(sum_bot + sum_top) # return volume, representative transect, and surface area return volume, x, sa @@ -168,7 +175,9 @@ def distmap_volume_surface_area_heidi(B, perimeter_image=None): Y, X = np.mgrid[1:h + 1, 1:w + 1] area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) c = (np.pi * x / 2.0) / (2.0 * np.sqrt(2.0) * x / 2.0 + (1.0 + np.sqrt(2.0)) / 2.0) - sa = 2 * c * (np.sum(area_bot) + np.sum(area_top)) + sum_bot = _det_sum32(area_bot.astype(np.float32)) + sum_top = _det_sum32(area_top.astype(np.float32)) + sa = np.float32(2.0) * np.float32(c) * np.float32(sum_bot + sum_top) return volume, x, sa def sor_volume_surface_area(B): From 3dc38fe8a99c90e79d6662d3aec74f2230cd7233 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Wed, 25 Feb 2026 15:32:14 -0500 Subject: [PATCH 18/31] add largest 8-connected component fix --- ifcb_features/all.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 1a77fd5..9d60907 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -210,6 +210,15 @@ def distmap_volume_surface_area(self): """volume of blob computed via Moberg & Sosik algorithm""" # MATLAB uses a tight cropped blob image for distmap via regionprops.Image. img = self.regionprops.image.astype(bool) + # Match MATLAB blob_geomprop: use largest 8-connected component. + if img.any(): + from scipy.ndimage import label + labeled, num = label(img, structure=np.ones((3, 3), dtype=np.int32)) + if num > 1: + counts = np.bincount(labeled.ravel()) + counts[0] = 0 + largest = counts.argmax() + img = labeled == largest perim = find_perimeter(img) return distmap_volume_surface_area(img, perim) @property From 2579531b5b71a4b8019c248b624b076a632d0196 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 27 Feb 2026 11:34:14 -0500 Subject: [PATCH 19/31] round float64 values to float32 --- ifcb_features/biovolume.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 5f326c2..e9b27f2 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -170,9 +170,11 @@ def distmap_volume_surface_area_heidi(B, perimeter_image=None): c2 = np.pi / 2 volume = np.float32(c1 * np.float32(c2) * np.float32(2.0) * sum_val) # surface area - D_sa = np.nan_to_num(D, nan=0.0) + D_sa = np.nan_to_num(D, nan=0.0).astype(np.float32, copy=False) h, w = D_sa.shape Y, X = np.mgrid[1:h + 1, 1:w + 1] + X = X.astype(np.float32, copy=False) + Y = Y.astype(np.float32, copy=False) area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) c = (np.pi * x / 2.0) / (2.0 * np.sqrt(2.0) * x / 2.0 + (1.0 + np.sqrt(2.0)) / 2.0) sum_bot = _det_sum32(area_bot.astype(np.float32)) From e9207d4a7a6a928e88cc467bc1df911337c54b9a Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 27 Feb 2026 13:45:31 -0500 Subject: [PATCH 20/31] add more explicit float32 casting --- ifcb_features/biovolume.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index e9b27f2..4c19dc8 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -132,8 +132,9 @@ def distmap_volume_surface_area(B,perimeter_image=None): area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) # final correction of the diamond cross-section # inherent in the distance map to be circular instead - c = (np.float32(np.pi) * x / np.float32(2.0)) / ( - np.float32(2.0) * np.float32(np.sqrt(2.0)) * x / np.float32(2.0) + # compute c fully in float32 to match MATLAB single-precision path + c = (np.float32(np.pi) * np.float32(x) / np.float32(2.0)) / ( + np.float32(2.0) * np.float32(np.sqrt(2.0)) * np.float32(x) / np.float32(2.0) + (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0) ) sum_bot = _det_sum32(area_bot.astype(np.float32)) @@ -166,9 +167,9 @@ def distmap_volume_surface_area_heidi(B, perimeter_image=None): # representative transect length x = np.float32(4.0) * mean_val - np.float32(2.0) # correction factors - c1 = (x**2) / (x**2 + 2 * x + 0.5) - c2 = np.pi / 2 - volume = np.float32(c1 * np.float32(c2) * np.float32(2.0) * sum_val) + c1 = (x**2) / (x**2 + np.float32(2.0) * x + np.float32(0.5)) + c2 = np.float32(np.pi / 2.0) + volume = np.float32(c1 * c2 * np.float32(2.0) * sum_val) # surface area D_sa = np.nan_to_num(D, nan=0.0).astype(np.float32, copy=False) h, w = D_sa.shape @@ -176,7 +177,11 @@ def distmap_volume_surface_area_heidi(B, perimeter_image=None): X = X.astype(np.float32, copy=False) Y = Y.astype(np.float32, copy=False) area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) - c = (np.pi * x / 2.0) / (2.0 * np.sqrt(2.0) * x / 2.0 + (1.0 + np.sqrt(2.0)) / 2.0) + # compute c fully in float32 to match MATLAB single-precision path + c = (np.float32(np.pi) * np.float32(x) / np.float32(2.0)) / ( + np.float32(2.0) * np.float32(np.sqrt(2.0)) * np.float32(x) / np.float32(2.0) + + (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0) + ) sum_bot = _det_sum32(area_bot.astype(np.float32)) sum_top = _det_sum32(area_top.astype(np.float32)) sa = np.float32(2.0) * np.float32(c) * np.float32(sum_bot + sum_top) From 04da5af78022bd0803f172cb9a97310bad522cb3 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:28:07 -0500 Subject: [PATCH 21/31] add zero to NaN conversion and remove unused roi param --- ifcb_features/all.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 9d60907..ca87954 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -22,12 +22,11 @@ from .ringwedge import ring_wedge, _N_RINGS, _N_WEDGES class BlobFeatures(object): - def __init__(self,blob_image,roi_image, roi=None): + def __init__(self,blob_image,roi_image): """roi_image should be the same size as the blob image, so a sub-roi""" self.image = np.array(blob_image).astype(np.bool) self.roi_image = roi_image - self.roi = roi @property def shape(self): """h,w of blob image""" @@ -342,7 +341,7 @@ def blobs(self): the segmented mask, ordered by largest area to smallest area""" labeled, bboxes, blobs = find_blobs(self.blobs_image) cropped_rois = [self.image[bbox] for bbox in bboxes] - Bs = [BlobFeatures(b, R, roi=self) for b, R in zip(blobs, cropped_rois)] + Bs = [BlobFeatures(b, R) for b, R in zip(blobs, cropped_rois)] # sort by area, largest first return sorted(Bs, key=lambda B: B.area, reverse=True) @property @@ -548,9 +547,11 @@ def compute_features(roi_image, blobs_image=None, raw_stitch=None): ('summedPerimeter', r.summed_perimeter), ('summedSurfaceArea', r.summed_surface_area), ] + def _zero_to_nan(v): + return float('nan') if v == 0 else v f += [ - ('Area_over_PerimeterSquared', b.area_over_perimeter_squared), - ('Area_over_Perimeter', b.area_over_perimeter), - ('summedConvexPerimeter_over_Perimeter', r.summed_convex_perimeter_over_perimeter), + ('Area_over_PerimeterSquared', _zero_to_nan(b.area_over_perimeter_squared)), + ('Area_over_Perimeter', _zero_to_nan(b.area_over_perimeter)), + ('summedConvexPerimeter_over_Perimeter', _zero_to_nan(r.summed_convex_perimeter_over_perimeter)), ] return (r.blobs_image, f) From d3cc825fb9123e249054ff5acd5cccb8d615c54b Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:30:00 -0500 Subject: [PATCH 22/31] remove debug function --- ifcb_features/all.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/ifcb_features/all.py b/ifcb_features/all.py index ca87954..6eacb47 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -8,7 +8,7 @@ from functools import lru_cache from .segmentation import segment_roi -from .blobs import find_blobs, label_blobs, rotate_blob, blob_shape +from .blobs import find_blobs, rotate_blob, blob_shape from .blob_geometry import (equiv_diameter, ellipse_properties, invmoments, convex_hull, convex_hull_image, convex_hull_properties, feret_diameter, @@ -345,19 +345,6 @@ def blobs(self): # sort by area, largest first return sorted(Bs, key=lambda B: B.area, reverse=True) @property - @lru_cache() - def largest_blob_mask_full(self): - """full-size mask for the largest blob (MATLAB uses full-size mask for biovolume)""" - labeled, objects = label_blobs(self.blobs_image) - if not objects: - return None - # labels are 1-based in labeled image - areas = [] - for ix in range(1, len(objects) + 1): - areas.append(int(np.sum(labeled == ix))) - idx = int(np.argmax(areas)) + 1 - return labeled == idx - @property def num_blobs(self): return len(self.blobs) @property From 34aabe47543656c139e2fb06b5300897a73bb967 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:33:37 -0500 Subject: [PATCH 23/31] remove debug code --- ifcb_features/biovolume.py | 94 ++------------------------------------ 1 file changed, 3 insertions(+), 91 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 4c19dc8..985403c 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -1,4 +1,3 @@ -import os import numpy as np from scipy.ndimage import binary_fill_holes, distance_transform_edt @@ -53,99 +52,12 @@ def _det_sum32(arr): sum_acc = np.float32(sum_acc + np.float32(v)) return np.float32(sum_acc) -USE_EDT_INDICES = True # recompute distances from indices to better match MATLAB bwdist # Match MATLAB SOR center logic (old top-edge + radius) when True. SOR_USE_OLD_CENTER = True -# Use Heidi_explore distmap implementation (bwdist + heidi surface area) when True. -USE_HEIDI_DISTMAP = True -def distmap_volume_surface_area(B,perimeter_image=None): - """Moberg & Sosik biovolume algorithm - returns volume and representative transect""" - if USE_HEIDI_DISTMAP: - return distmap_volume_surface_area_heidi(B, perimeter_image) - if perimeter_image is None: - perimeter_image = find_perimeter(B) - # elementwise distance to perimeter + 1 - if USE_EDT_INDICES: - _, inds = distance_transform_edt(1 - perimeter_image, return_indices=True) - coords = np.indices(perimeter_image.shape, dtype=np.int64) - deltas = coords - inds.astype(np.int64, copy=False) - dist2 = np.sum(deltas * deltas, axis=0, dtype=np.int64) - D = np.sqrt(dist2.astype(np.float32)) + np.float32(1.0) - else: - D = distance_transform_edt(1 - perimeter_image) + 1 - # mask distances outside filled perimeter (match MATLAB imfill on boundary image) - fill = binary_fill_holes(np.array(perimeter_image, dtype=bool)) - D = D.astype(np.float32) - D[~fill] = np.nan - # representative transect (match MATLAB float32 sum/mean in column-major order) - flat_raw = D.ravel(order="F") - nan_mask = np.isnan(flat_raw) - count = np.int64(np.sum(~nan_mask)) - flat = np.where(nan_mask, np.float32(0.0), flat_raw.astype(np.float32)) - use_deterministic_sum = True - if count == 0: - sum_val = np.float32(0.0) - mean_val = np.float32(np.nan) - else: - if use_deterministic_sum: - # Deterministic column-major sum in float32 (match MATLAB single). - sum_acc = np.float32(0.0) - cnt = 0 - for v, is_nan in zip(flat_raw, nan_mask): - if not is_nan: - sum_acc = np.float32(sum_acc + np.float32(v)) - cnt += 1 - sum_val = np.float32(sum_acc) - mean_val = np.float32(sum_acc / np.float32(cnt)) if cnt else np.float32(np.nan) - else: - # Match MATLAB sum for single arrays: column-major, float32 accumulation. - sum_val = np.float32(np.sum(flat, dtype=np.float32)) - mean_val = np.float32(sum_val / np.float32(count)) - if use_deterministic_sum: - x = np.float32(4.0) * mean_val - np.float32(2.0) - else: - x = np.float32(4.0) * mean_val - np.float32(2.0) - # diamond correction - c1 = x**2 / (x**2 + 2*x + 0.5) - # circle correction - # c2 = np.pi / 2 - # volume = c1 * c2 * 2 * np.sum(D) - if use_deterministic_sum: - c1 = np.float32(c1) - volume = np.float32(c1 * np.float32(np.pi) * sum_val) - else: - # compute volume in float32 to match MATLAB single-precision path - c1 = np.float32(c1) - volume = np.float32(c1 * np.float32(np.pi) * sum_val) - if os.getenv("DISTMAP_DEBUG") == "1": - print("distmap_debug sum_val", float(sum_val), "mean_val", float(mean_val), "x", float(x), "volume", float(volume)) - # surface area - # surface area uses NaN-masked distances as zero - D_sa = np.nan_to_num(D, nan=0.0) - h, w = D_sa.shape - # MATLAB uses 1-based X/Y indices for surface area geometry. - Y, X = np.mgrid[1:h + 1, 1:w + 1] - X = X.astype(np.float32, copy=False) - Y = Y.astype(np.float32, copy=False) - area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) - # final correction of the diamond cross-section - # inherent in the distance map to be circular instead - # compute c fully in float32 to match MATLAB single-precision path - c = (np.float32(np.pi) * np.float32(x) / np.float32(2.0)) / ( - np.float32(2.0) * np.float32(np.sqrt(2.0)) * np.float32(x) / np.float32(2.0) - + (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0) - ) - sum_bot = _det_sum32(area_bot.astype(np.float32)) - sum_top = _det_sum32(area_top.astype(np.float32)) - sa = np.float32(2.0) * c * np.float32(sum_bot + sum_top) - # return volume, representative transect, and surface area - return volume, x, sa - - -def distmap_volume_surface_area_heidi(B, perimeter_image=None): - """Heidi_explore distmap_volume (bwdist + surface area) implementation.""" +def distmap_volume_surface_area(B, perimeter_image=None): + """Moberg & Sosik biovolume algorithm (Heidi_explore implementation). + Returns volume, representative transect, and surface area.""" if perimeter_image is None: perimeter_image = find_perimeter(B) # calculate distance map (MATLAB bwdist) From d99b22f6581dd334dab821e5b32f7c646ad11b17 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:37:45 -0500 Subject: [PATCH 24/31] remove kmeans debug code --- ifcb_features/segmentation.py | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index 1b23f82..ff9d331 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -1,4 +1,3 @@ -import os import numpy as np from skimage import img_as_float32 @@ -6,7 +5,6 @@ from scipy.ndimage.morphology import binary_fill_holes from skimage.morphology import binary_closing, binary_dilation, binary_erosion, remove_small_objects, diamond import skimage.filters as imfilters -from sklearn.cluster import KMeans from skimage.util import img_as_ubyte @@ -20,13 +18,6 @@ HT_T1, HT_T2 = 0.3, 0.09 BLOB_MIN = 40 DARK_THRESHOLD_ADJUSTMENT = 0.75 -USE_STRICT_KMEANS = True -KMEANS_FALLBACK_SEED = os.getenv("IFCB_KMEANS_FALLBACK_SEED") -if KMEANS_FALLBACK_SEED is not None: - try: - KMEANS_FALLBACK_SEED = int(KMEANS_FALLBACK_SEED) - except ValueError: - KMEANS_FALLBACK_SEED = None def _kmeans_1d_strict(values, max_iter=100): @@ -127,29 +118,9 @@ def kmeans_segment(roi): r = roi.astype(np.float32) # Use column-major order to match MATLAB img(:) traversal. values = r.reshape(-1, order="F") - if USE_STRICT_KMEANS: - C, J = _kmeans_1d_strict(values, max_iter=100) - if C is None: - C = None - else: - C = None - - if C is None: - # fallback to sklearn k-means - km = KMeans( - n_clusters=2, - n_init=1, - init="k-means++", - max_iter=100, - tol=0, - random_state=KMEANS_FALLBACK_SEED, - ) - km.fit(values.reshape(-1, 1)) - J = km.labels_ - C = km.cluster_centers_.reshape(-1) - else: - C = C.reshape(-1) - J = J.reshape(-1) + C, J = _kmeans_1d_strict(values, max_iter=100) + C = C.reshape(-1) + J = J.reshape(-1) # reshape labels to image using MATLAB order J = J.reshape(r.shape, order="F") From 904305fbba7b5f81004d87812ef495411136fead Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:42:00 -0500 Subject: [PATCH 25/31] remove sor debug code --- ifcb_features/biovolume.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 985403c..921efdb 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -52,8 +52,6 @@ def _det_sum32(arr): sum_acc = np.float32(sum_acc + np.float32(v)) return np.float32(sum_acc) -# Match MATLAB SOR center logic (old top-edge + radius) when True. -SOR_USE_OLD_CENTER = True def distmap_volume_surface_area(B, perimeter_image=None): """Moberg & Sosik biovolume algorithm (Heidi_explore implementation). @@ -107,17 +105,9 @@ def sor_volume_surface_area(B): r = np.sum(B, axis=0).astype(np.float64) ri = r > 0 r = (r / 2.0)[ri] - if SOR_USE_OLD_CENTER: - y1 = np.argmax(B, axis=0).astype(np.float64) + 1.0 - y1 = y1[ri] - center = y1 + r - else: - rowind = np.arange(1, h + 1, dtype=np.float64)[:, None] - temp = rowind * B - temp = temp.astype(np.float64, copy=False) - temp[temp == 0] = np.nan - center = np.nanmedian(temp, axis=0) + 0.5 - center = center[ri] + y1 = np.argmax(B, axis=0).astype(np.float64) + 1.0 + y1 = y1[ri] + center = y1 + r n_slices = r.size # compute angles between 0 and 180 degrees inclusive, in radians da = 0.25 From 95f99589099383285dd51ed5952c3024cf6a97aa Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:43:07 -0500 Subject: [PATCH 26/31] remove unused imports --- ifcb_features/segmentation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index ff9d331..70263a2 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -1,13 +1,10 @@ import numpy as np -from skimage import img_as_float32 from scipy.ndimage import label from scipy.ndimage.morphology import binary_fill_holes -from skimage.morphology import binary_closing, binary_dilation, binary_erosion, remove_small_objects, diamond +from skimage.morphology import binary_closing, binary_dilation, binary_erosion, diamond import skimage.filters as imfilters -from skimage.util import img_as_ubyte - from .phasecong import phasecong_Mm from .morphology import SE3, hysthresh, bwmorph_thin, EIGHT From 76255b64b702b9e16ba2f2467eb2763f6a946bf1 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:52:01 -0500 Subject: [PATCH 27/31] remove unused code from debugging --- ifcb_features/biovolume.py | 1 - ifcb_features/blobs.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 921efdb..8791c78 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -101,7 +101,6 @@ def sor_volume_surface_area(B): """pass in rotated blob""" """Sosik and Kilfoyle surface area / volume algorithm""" # compute center using median (current) or bottom+radius (legacy MATLAB) - h, w = B.shape r = np.sum(B, axis=0).astype(np.float64) ri = r > 0 r = (r / 2.0)[ri] diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index ae51f75..d044c0f 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -2,8 +2,6 @@ from scipy.ndimage import measurements -from scipy.ndimage import rotate as nd_rotate - from .morphology import SE2, SE3, EIGHT, bwmorph_thin def label_blobs(B): From f6db04237acac8523529fa8aa0464a9d767af976 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 5 Mar 2026 12:56:17 -0500 Subject: [PATCH 28/31] fix comment --- ifcb_features/biovolume.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 8791c78..0fd2967 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -100,7 +100,7 @@ def distmap_volume_surface_area(B, perimeter_image=None): def sor_volume_surface_area(B): """pass in rotated blob""" """Sosik and Kilfoyle surface area / volume algorithm""" - # compute center using median (current) or bottom+radius (legacy MATLAB) + # compute center as top-edge + radius to match MATLAB r = np.sum(B, axis=0).astype(np.float64) ri = r > 0 r = (r / 2.0)[ri] From 823a02004d6e1230c061758ce996d5d4827b089d Mon Sep 17 00:00:00 2001 From: John Walsh Date: Thu, 7 May 2026 15:38:29 -0400 Subject: [PATCH 29/31] add explicit float rounding in kmeans --- ifcb_features/segmentation.py | 53 ++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index 1b23f82..2bd125d 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -36,11 +36,35 @@ def _kmeans_1d_strict(values, max_iter=100): if n == 0: return np.array([0.0, 1.0], dtype=np.float32), np.zeros(0, dtype=np.int8) + def center_for_cluster(idx, cluster): + total = np.float32(0.0) + count = np.float32(0.0) + for i in range(n): + if idx[i] == cluster: + total = np.float32(total + values[i]) + count = np.float32(count + np.float32(1.0)) + if count == 0: + return np.float32(np.nan), 0 + return np.float32(total / count), int(count) + + def distances_to_center(center): + distances = np.empty(n, dtype=np.float32) + for i in range(n): + delta = np.float32(values[i] - center) + distances[i] = np.float32(delta * delta) + return distances + + def assigned_total(distances, idx): + total = np.float32(0.0) + for i in range(n): + total = np.float32(total + distances[i, idx[i]]) + return total + centers = np.array([0.0, 1.0], dtype=np.float32) # Initial distances/assignments from provided centers D = np.empty((n, 2), dtype=np.float32) - D[:, 0] = (values - centers[0]) * (values - centers[0]) - D[:, 1] = (values - centers[1]) * (values - centers[1]) + D[:, 0] = distances_to_center(centers[0]) + D[:, 1] = distances_to_center(centers[1]) idx = np.argmin(D, axis=1).astype(np.int8) changed = np.array([0, 1], dtype=np.int64) @@ -50,16 +74,13 @@ def _kmeans_1d_strict(values, max_iter=100): for _iter in range(max_iter): # Recompute centers and counts for changed clusters counts = np.bincount(idx, minlength=2).astype(np.int64) - sums = np.zeros(2, dtype=np.float64) - for i in range(n): - sums[idx[i]] += float(values[i]) for c in changed: if counts[c] > 0: - centers[c] = np.float32(sums[c] / counts[c]) + centers[c], counts[c] = center_for_cluster(idx, c) # Update distances for changed clusters for c in changed: - D[:, c] = (values - centers[c]) * (values - centers[c]) + D[:, c] = distances_to_center(centers[c]) # Handle empty clusters (singleton) empties = [c for c in changed if counts[c] == 0] @@ -76,30 +97,22 @@ def _kmeans_1d_strict(values, max_iter=100): idx[lonely] = empty counts[empty] = 1 counts[from_cluster] -= 1 - D[:, empty] = (values - centers[empty]) * (values - centers[empty]) + D[:, empty] = distances_to_center(centers[empty]) # Update donor cluster center/distance - sums[from_cluster] = np.float64(0.0) - for i in range(n): - if idx[i] == from_cluster: - sums[from_cluster] += float(values[i]) if counts[from_cluster] > 0: - centers[from_cluster] = np.float32(sums[from_cluster] / counts[from_cluster]) - D[:, from_cluster] = (values - centers[from_cluster]) * (values - centers[from_cluster]) + centers[from_cluster], counts[from_cluster] = center_for_cluster(idx, from_cluster) + D[:, from_cluster] = distances_to_center(centers[from_cluster]) changed = np.unique(np.concatenate([changed, np.array([from_cluster], dtype=np.int64)])) # Compute objective and check for improvement - d_assigned = D[np.arange(n), idx] - totsumD = np.sum(d_assigned, dtype=np.float32) + totsumD = assigned_total(D, idx) if prevtotsumD <= totsumD: idx = previdx counts = np.bincount(idx, minlength=2).astype(np.int64) - sums = np.zeros(2, dtype=np.float64) - for i in range(n): - sums[idx[i]] += float(values[i]) for c in changed: if counts[c] > 0: - centers[c] = np.float32(sums[c] / counts[c]) + centers[c], counts[c] = center_for_cluster(idx, c) break previdx = idx.copy() From cf3a59b2b74743962592e6e7d316d8e8686a0ad9 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 8 May 2026 16:28:23 -0400 Subject: [PATCH 30/31] switch from for loops to vectorization for speedup --- ifcb_features/segmentation.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index cd94a45..7de79e9 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -23,30 +23,24 @@ def _kmeans_1d_strict(values, max_iter=100): n = values.shape[0] if n == 0: return np.array([0.0, 1.0], dtype=np.float32), np.zeros(0, dtype=np.int8) + row_indices = np.arange(n) def center_for_cluster(idx, cluster): - total = np.float32(0.0) - count = np.float32(0.0) - for i in range(n): - if idx[i] == cluster: - total = np.float32(total + values[i]) - count = np.float32(count + np.float32(1.0)) + members = idx == cluster + count = int(np.count_nonzero(members)) if count == 0: return np.float32(np.nan), 0 - return np.float32(total / count), int(count) + total = np.cumsum(values[members], dtype=np.float32)[-1] + count_float = np.float32(count) + return np.float32(total / count_float), count def distances_to_center(center): - distances = np.empty(n, dtype=np.float32) - for i in range(n): - delta = np.float32(values[i] - center) - distances[i] = np.float32(delta * delta) - return distances + delta = values - np.float32(center) + return np.asarray(delta * delta, dtype=np.float32) def assigned_total(distances, idx): - total = np.float32(0.0) - for i in range(n): - total = np.float32(total + distances[i, idx[i]]) - return total + assigned = distances[row_indices, idx] + return np.cumsum(assigned, dtype=np.float32)[-1] centers = np.array([0.0, 1.0], dtype=np.float32) # Initial distances/assignments from provided centers @@ -73,7 +67,7 @@ def assigned_total(distances, idx): # Handle empty clusters (singleton) empties = [c for c in changed if counts[c] == 0] if empties: - d_assigned = D[np.arange(n), idx] + d_assigned = D[row_indices, idx] for empty in empties: lonely = int(np.argmax(d_assigned)) from_cluster = int(idx[lonely]) @@ -108,7 +102,7 @@ def assigned_total(distances, idx): # Reassign using nearest centroid, tie -> stay nidx = np.argmin(D, axis=1).astype(np.int8) - dmin = D[np.arange(n), nidx] + dmin = D[row_indices, nidx] moved = np.where(nidx != previdx)[0] if moved.size: stay_mask = D[moved, previdx[moved]] > dmin[moved] From ffedc17b191c4193f8476ccafc3bbc4904cd689b Mon Sep 17 00:00:00 2001 From: John Walsh Date: Fri, 15 May 2026 13:28:14 -0400 Subject: [PATCH 31/31] add more explicit orientation code --- ifcb_features/all.py | 11 ++---- ifcb_features/blob_geometry.py | 43 ++++++++++++++++++++++- ifcb_features/blobs.py | 63 +++++++++++++++++++++++++++------- 3 files changed, 95 insertions(+), 22 deletions(-) diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 6eacb47..b3015ab 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -12,7 +12,8 @@ from .blob_geometry import (equiv_diameter, ellipse_properties, invmoments, convex_hull, convex_hull_image, convex_hull_properties, feret_diameter, - binary_symmetry, feret_diameters) + binary_symmetry, feret_diameters, + explicit_orientation) from .morphology import find_perimeter from .biovolume import (distmap_volume_surface_area, sor_volume_surface_area) @@ -151,13 +152,7 @@ def eccentricity(self): @lru_cache() def orientation(self): """return orientation of blob in degrees""" - # skimage.regionprops.orientation is angle between the 0th axis (rows) - # and the major axis. MATLAB uses angle from the x-axis (columns). - rad = self.regionprops.orientation - deg = 90.0 + (180.0 / np.pi) * rad - if deg > 90.0: - deg -= 180.0 - return deg + return explicit_orientation(self.image) @property @lru_cache() def centroid(self): diff --git a/ifcb_features/blob_geometry.py b/ifcb_features/blob_geometry.py index 6467acc..a052310 100644 --- a/ifcb_features/blob_geometry.py +++ b/ifcb_features/blob_geometry.py @@ -43,6 +43,48 @@ def ellipse_properties(B): return maj_axis, min_axis, ecc, orientation + +def explicit_orientation(B): + """Deterministic blob orientation in degrees for MATLAB/Python parity.""" + B = np.array(B).astype(np.bool) + rows, cols = np.indices(B.shape, dtype=np.float64) + x = (cols + 1.0).ravel(order='C') + y = (rows + 1.0).ravel(order='C') + f = B.astype(np.float64).ravel(order='C') + + m00 = np.float64(0.0) + m10 = np.float64(0.0) + m01 = np.float64(0.0) + for xi, yi, fi in zip(x, y, f): + m00 = np.float64(m00 + np.float64(fi)) + m10 = np.float64(m10 + np.float64(xi * fi)) + m01 = np.float64(m01 + np.float64(yi * fi)) + + if m00 == 0: + return 0.0 + + xbar = np.float64(m10 / m00) + ybar = np.float64(m01 / m00) + + mu20 = np.float64(0.0) + mu02 = np.float64(0.0) + mu11 = np.float64(0.0) + for xi, yi, fi in zip(x, y, f): + dx = np.float64(xi - xbar) + dy = np.float64(yi - ybar) + mu20 = np.float64(mu20 + np.float64(dx * dx * fi)) + mu02 = np.float64(mu02 + np.float64(dy * dy * fi)) + mu11 = np.float64(mu11 + np.float64(dx * dy * fi)) + + theta = np.float64(-0.5) * np.float64( + np.degrees(np.arctan2(np.float64(2.0) * mu11, mu20 - mu02)) + ) + while theta > 90.0: + theta = np.float64(theta - 180.0) + while theta <= -90.0: + theta = np.float64(theta + 180.0) + return float(theta) + def invmoments(B): """compute invariant moments. see Digital Image Processing Using MATLAB, ch. 11""" @@ -181,4 +223,3 @@ def ss(D): # flipped across horizontal (major) axis bflip = ss(np.flipud(B)) return b180, b90, bflip - diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index d044c0f..024d5e8 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -71,21 +71,58 @@ def imrotate_nearest_crop(img, angle_deg): """MATLAB-compatible imrotate(..., 'nearest', 'crop') for binary masks.""" img = np.array(img).astype(np.bool) h, w = img.shape - # MATLAB uses 1-based coordinates, center at (w+1)/2, (h+1)/2 - cx = (w + 1) / 2.0 - cy = (h + 1) / 2.0 - yy, xx = np.indices((h, w)) - x = xx + 1.0 - y = yy + 1.0 - x0 = x - cx - y0 = y - cy + # MATLAB blob_rotate calls imrotate(centered, -Orientation, ...). The + # Python caller passes the already sign-adjusted orientation, so use the + # opposite angle for MATLAB's affine2d/imwarp coordinate convention. + ang = np.deg2rad(-angle_deg) + cos_a = np.cos(ang) + sin_a = np.sin(ang) - ang = np.deg2rad(angle_deg) - cos_a = np.cos(-ang) - sin_a = np.sin(-ang) - x_in = cos_a * x0 - sin_a * y0 + cx - y_in = sin_a * x0 + cos_a * y0 + cy + # imrotate's general-rotation crop path is: + # RA = imref2d(size(A)); + # Rout = applyGeometricTransformToSpatialRef(RA, tform); + # Rout.ImageSize = RA.ImageSize; + # Rout limits are shifted to preserve the center; + # imwarp(A, tform, 'nearest', 'OutputView', Rout) + # Default imref2d world limits are [0.5, size + 0.5]. + x_limits = (0.5, w + 0.5) + y_limits = (0.5, h + 0.5) + corners = np.array( + [ + [x_limits[0], y_limits[0]], + [x_limits[0], y_limits[1]], + [x_limits[1], y_limits[0]], + [x_limits[1], y_limits[1]], + ], + dtype=np.float64, + ) + x_out = corners[:, 0] * cos_a + corners[:, 1] * sin_a + y_out = -corners[:, 0] * sin_a + corners[:, 1] * cos_a + x_trans = (float(np.min(x_out)) + float(np.max(x_out))) / 2.0 - ( + x_limits[0] + x_limits[1] + ) / 2.0 + y_trans = (float(np.min(y_out)) + float(np.max(y_out))) / 2.0 - ( + y_limits[0] + y_limits[1] + ) / 2.0 + x_world_min = x_limits[0] + x_trans + y_world_min = y_limits[0] + y_trans + + # MATLAB's imwarp path lands infinitesimally below exact half-pixel + # boundaries for the observed tie cases. Nudge the crop reference down + # by two ULPs so nearest-neighbor rounding follows the same side. + x_world_min = np.nextafter(np.nextafter(x_world_min, -np.inf), -np.inf) + y_world_min = np.nextafter(np.nextafter(y_world_min, -np.inf), -np.inf) + + rows, cols = np.indices((h, w), dtype=np.float64) + x_world = x_world_min + (cols + 1.0 - 0.5) + y_world = y_world_min + (rows + 1.0 - 0.5) + + # transformPointsInverse for affine2d rotation matrix + # [cos -sin 0; sin cos 0; 0 0 1], then worldToIntrinsic(RA). + # With default RA limits and unit pixel extents, world == intrinsic. + x_in = x_world * cos_a - y_world * sin_a + y_in = x_world * sin_a + y_world * cos_a # MATLAB round: ties away from zero (works for negative too). x_idx = (np.sign(x_in) * np.floor(np.abs(x_in) + 0.5)).astype(np.int64)