diff --git a/extract_slim_features.py b/extract_slim_features.py index 93f3f7a..0644e60 100644 --- a/extract_slim_features.py +++ b/extract_slim_features.py @@ -110,7 +110,7 @@ def extract_and_save_all_features(data_directory, output_directory, bins=None): if all_features: df = pd.DataFrame.from_records(all_features, columns=['roi_number'] + FEATURE_COLUMNS) - df.to_csv(features_output_filename, index=False) + df.to_csv(features_output_filename, index=False, float_format="%.10g") if all_blobs: with zipfile.ZipFile(blobs_output_filename, 'w') as zf: @@ -130,4 +130,4 @@ def extract_and_save_all_features(data_directory, output_directory, bins=None): extract_and_save_all_features(args.data_directory, args.output_directory, args.bins) elapsed = time.time() - beginning - print(f'Total extract time: {elapsed:.2f} seconds') \ No newline at end of file + print(f'Total extract time: {elapsed:.2f} seconds') diff --git a/ifcb_features/all.py b/ifcb_features/all.py index 410ad17..b3015ab 100644 --- a/ifcb_features/all.py +++ b/ifcb_features/all.py @@ -1,8 +1,9 @@ import numpy as np from scipy.ndimage.measurements import find_objects +from scipy.spatial import QhullError -from skimage.measure import regionprops +from skimage.measure import label, regionprops from functools import lru_cache @@ -11,11 +12,12 @@ from .blob_geometry import (equiv_diameter, ellipse_properties, invmoments, convex_hull, convex_hull_image, convex_hull_properties, feret_diameter, - binary_symmetry, feret_diameters) + binary_symmetry, feret_diameters, + explicit_orientation) from .morphology import find_perimeter from .biovolume import (distmap_volume_surface_area, sor_volume_surface_area) -from .perimeter import perimeter_stats, hausdorff_symmetry +from .perimeter import perimeter_stats, hausdorff_symmetry, benkrid_perimeter from .texture import statxture, masked_pixels, texture_pixels from .hog import image_hog from .ringwedge import ring_wedge, _N_RINGS, _N_WEDGES @@ -43,8 +45,13 @@ def size(self): @property @lru_cache() def regionprops(self): - """region props of the blob (assumes single connected region)""" - return regionprops(self.image.astype(np.uint8))[0] + """region props of the blob (assumes single connected region).""" + labels = label(self.image.astype(np.uint8), connectivity=2) + props = regionprops(labels) + if not props: + return None + # Use the largest region to match MATLAB's 8-connected component. + return max(props, key=lambda p: p.area) @property @lru_cache() def area(self): @@ -58,7 +65,7 @@ def equiv_diameter(self): @property @lru_cache() def perimeter(self): - return self.regionprops.perimeter + return benkrid_perimeter(self.perimeter_image) @property def area_over_perimeter_squared(self): return self.area / self.perimeter**2 @@ -74,11 +81,20 @@ def extent(self): @lru_cache() def convex_hull(self): """vertices of convex hull of blob""" - return convex_hull(self.perimeter_points) + try: + return convex_hull(self.perimeter_points) + except QhullError: + # Colinear or degenerate perimeter points; fall back to raw points. + return np.vstack(self.perimeter_points).T @property @lru_cache() def convex_hull_properties(self): - return convex_hull_properties(self.convex_hull) + hull = self.convex_hull + if hull is None or hull.shape[0] < 3: + return self.perimeter, self.area + if np.linalg.matrix_rank(hull - hull[0]) < 2: + return self.perimeter, self.area + return convex_hull_properties(hull) @property @lru_cache() def convex_perimeter(self): @@ -136,11 +152,7 @@ def eccentricity(self): @lru_cache() def orientation(self): """return orientation of blob in degrees""" - rad = self.ellipse_properties[3] - deg = (180/np.pi) * rad - 90 - if deg < -90: - deg += 180 - return deg + return explicit_orientation(self.image) @property @lru_cache() def centroid(self): @@ -190,7 +202,19 @@ def perimeter_points(self): @lru_cache() def distmap_volume_surface_area(self): """volume of blob computed via Moberg & Sosik algorithm""" - return distmap_volume_surface_area(self.image, self.perimeter_image) + # MATLAB uses a tight cropped blob image for distmap via regionprops.Image. + img = self.regionprops.image.astype(bool) + # Match MATLAB blob_geomprop: use largest 8-connected component. + if img.any(): + from scipy.ndimage import label + labeled, num = label(img, structure=np.ones((3, 3), dtype=np.int32)) + if num > 1: + counts = np.bincount(labeled.ravel()) + counts[0] = 0 + largest = counts.argmax() + img = labeled == largest + perim = find_perimeter(img) + return distmap_volume_surface_area(img, perim) @property @lru_cache() def sor_volume_surface_area(self): @@ -312,8 +336,7 @@ def blobs(self): the segmented mask, ordered by largest area to smallest area""" labeled, bboxes, blobs = find_blobs(self.blobs_image) cropped_rois = [self.image[bbox] for bbox in bboxes] - # ignore 1-pixel wide/high blobs - Bs = [BlobFeatures(b,R) for b,R in zip(blobs,cropped_rois) if min(R.shape) > 1] + Bs = [BlobFeatures(b, R) for b, R in zip(blobs, cropped_rois)] # sort by area, largest first return sorted(Bs, key=lambda B: B.area, reverse=True) @property @@ -422,7 +445,9 @@ def ring(self): return self.ring_wedge[3] @lru_cache() def summed_attr(self, attr): - return np.sum(getattr(b,attr) for b in self.blobs) + vals = np.array([getattr(b, attr) for b in self.blobs], dtype=np.float64) + # MATLAB stores blob props in double arrays and sums in double. + return float(np.sum(vals, dtype=np.float64)) @property def summed_area(self): return self.summed_attr('area') @@ -504,9 +529,11 @@ def compute_features(roi_image, blobs_image=None, raw_stitch=None): ('summedPerimeter', r.summed_perimeter), ('summedSurfaceArea', r.summed_surface_area), ] + def _zero_to_nan(v): + return float('nan') if v == 0 else v f += [ - ('Area_over_PerimeterSquared', b.area_over_perimeter_squared), - ('Area_over_Perimeter', b.area_over_perimeter), - ('summedConvexPerimeter_over_Perimeter', r.summed_convex_perimeter_over_perimeter), + ('Area_over_PerimeterSquared', _zero_to_nan(b.area_over_perimeter_squared)), + ('Area_over_Perimeter', _zero_to_nan(b.area_over_perimeter)), + ('summedConvexPerimeter_over_Perimeter', _zero_to_nan(r.summed_convex_perimeter_over_perimeter)), ] return (r.blobs_image, f) diff --git a/ifcb_features/biovolume.py b/ifcb_features/biovolume.py index 939f57a..0fd2967 100644 --- a/ifcb_features/biovolume.py +++ b/ifcb_features/biovolume.py @@ -1,6 +1,6 @@ import numpy as np -from scipy.ndimage import distance_transform_edt +from scipy.ndimage import binary_fill_holes, distance_transform_edt from .morphology import find_perimeter @@ -44,60 +44,84 @@ def bottom_top_area(X,Y,Z,ignore_ground=False): return area_bot, area_top -def distmap_volume_surface_area(B,perimeter_image=None): - """Moberg & Sosik biovolume algorithm - returns volume and representative transect""" +def _det_sum32(arr): + """Deterministic float32 sum in column-major order.""" + sum_acc = np.float32(0.0) + flat = arr.ravel(order="F") + for v in flat: + sum_acc = np.float32(sum_acc + np.float32(v)) + return np.float32(sum_acc) + + +def distmap_volume_surface_area(B, perimeter_image=None): + """Moberg & Sosik biovolume algorithm (Heidi_explore implementation). + Returns volume, representative transect, and surface area.""" if perimeter_image is None: perimeter_image = find_perimeter(B) - # elementwise distance to perimeter + 1 - D = distance_transform_edt(1-perimeter_image) + 1 - # mask distances outside blob - D = D * (B>0) - Dm = np.ma.array(D,mask=1-B) - # representative transect - x = 4 * np.ma.mean(Dm) - 2 - # diamond correction - c1 = x**2 / (x**2 + 2*x + 0.5) - # circle correction - # c2 = np.pi / 2 - # volume = c1 * c2 * 2 * np.sum(D) - volume = c1 * np.pi * np.sum(D) + # calculate distance map (MATLAB bwdist) + D = distance_transform_edt(1 - perimeter_image) + 1 + # mask distances outside filled perimeter + fill = binary_fill_holes(np.array(perimeter_image, dtype=bool)) + D = D.astype(np.float64) + D[~fill] = np.nan + # deterministic sum/mean over column-major order to match MATLAB loop + flat = D.ravel(order="F") + sum_acc = np.float32(0.0) + cnt = 0 + for v in flat: + if not np.isnan(v): + sum_acc = np.float32(sum_acc + np.float32(v)) + cnt += 1 + sum_val = np.float32(sum_acc) + mean_val = np.float32(sum_acc / np.float32(cnt)) if cnt else np.float32(np.nan) + # representative transect length + x = np.float32(4.0) * mean_val - np.float32(2.0) + # correction factors + c1 = (x**2) / (x**2 + np.float32(2.0) * x + np.float32(0.5)) + c2 = np.float32(np.pi / 2.0) + volume = np.float32(c1 * c2 * np.float32(2.0) * sum_val) # surface area - h, w = D.shape - Y, X = np.mgrid[0:h,0:w] - area_bot, area_top = bottom_top_area(X,Y,D,ignore_ground=True) - # final correction of the diamond cross-section - # inherent in the distance map to be circular instead - c = (np.pi * x / 2) / (2 * np.sqrt(2) * x / 2 + (1 + np.sqrt(2)) / 2) - sa = 2 * c * (np.sum(area_bot) + np.sum(area_top)) - # return volume, representative transect, and surface area + D_sa = np.nan_to_num(D, nan=0.0).astype(np.float32, copy=False) + h, w = D_sa.shape + Y, X = np.mgrid[1:h + 1, 1:w + 1] + X = X.astype(np.float32, copy=False) + Y = Y.astype(np.float32, copy=False) + area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True) + # compute c fully in float32 to match MATLAB single-precision path + c = (np.float32(np.pi) * np.float32(x) / np.float32(2.0)) / ( + np.float32(2.0) * np.float32(np.sqrt(2.0)) * np.float32(x) / np.float32(2.0) + + (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0) + ) + sum_bot = _det_sum32(area_bot.astype(np.float32)) + sum_top = _det_sum32(area_top.astype(np.float32)) + sa = np.float32(2.0) * np.float32(c) * np.float32(sum_bot + sum_top) return volume, x, sa def sor_volume_surface_area(B): """pass in rotated blob""" """Sosik and Kilfoyle surface area / volume algorithm""" - # find the bottom point of the circle for each column - m = np.argmax(B,axis=0) - # compute the radius of each slice - d = np.sum(B,axis=0) - # exclude 0s - r = (d/2.)[d>0] - m = m[d>0] + # compute center as top-edge + radius to match MATLAB + r = np.sum(B, axis=0).astype(np.float64) + ri = r > 0 + r = (r / 2.0)[ri] + y1 = np.argmax(B, axis=0).astype(np.float64) + 1.0 + y1 = y1[ri] + center = y1 + r n_slices = r.size - # compute 721 angles between 0 and 180 degrees inclusive, in radians - n_angles = 721 - angR = np.linspace(0,180,n_angles) * (np.pi / 180) + # compute angles between 0 and 180 degrees inclusive, in radians + da = 0.25 + angvec = np.arange(0, 180 + da / 2, da, dtype=np.float64) + n_angles = angvec.size + angR = angvec * (np.pi / 180) # make everything the same shape: (nslices, nangles) - angR = np.vstack([angR] * m.size) - m = np.vstack([m] * n_angles).T + angR = np.vstack([angR] * n_slices) r = np.vstack([r] * n_angles).T - - # compute the center of each slice - center = m + r + center = np.vstack([center] * n_angles).T # correct for edge effects - center[0,:]=center[1,:] - center[-1,:]=center[-2,:] + if n_slices >= 2: + center[0, :] = center[1, :] + center[-1, :] = center[-2, :] # y coordinates of all angles on all slices Y = center + np.cos(angR) * r diff --git a/ifcb_features/blob_geometry.py b/ifcb_features/blob_geometry.py index 6467acc..a052310 100644 --- a/ifcb_features/blob_geometry.py +++ b/ifcb_features/blob_geometry.py @@ -43,6 +43,48 @@ def ellipse_properties(B): return maj_axis, min_axis, ecc, orientation + +def explicit_orientation(B): + """Deterministic blob orientation in degrees for MATLAB/Python parity.""" + B = np.array(B).astype(np.bool) + rows, cols = np.indices(B.shape, dtype=np.float64) + x = (cols + 1.0).ravel(order='C') + y = (rows + 1.0).ravel(order='C') + f = B.astype(np.float64).ravel(order='C') + + m00 = np.float64(0.0) + m10 = np.float64(0.0) + m01 = np.float64(0.0) + for xi, yi, fi in zip(x, y, f): + m00 = np.float64(m00 + np.float64(fi)) + m10 = np.float64(m10 + np.float64(xi * fi)) + m01 = np.float64(m01 + np.float64(yi * fi)) + + if m00 == 0: + return 0.0 + + xbar = np.float64(m10 / m00) + ybar = np.float64(m01 / m00) + + mu20 = np.float64(0.0) + mu02 = np.float64(0.0) + mu11 = np.float64(0.0) + for xi, yi, fi in zip(x, y, f): + dx = np.float64(xi - xbar) + dy = np.float64(yi - ybar) + mu20 = np.float64(mu20 + np.float64(dx * dx * fi)) + mu02 = np.float64(mu02 + np.float64(dy * dy * fi)) + mu11 = np.float64(mu11 + np.float64(dx * dy * fi)) + + theta = np.float64(-0.5) * np.float64( + np.degrees(np.arctan2(np.float64(2.0) * mu11, mu20 - mu02)) + ) + while theta > 90.0: + theta = np.float64(theta - 180.0) + while theta <= -90.0: + theta = np.float64(theta + 180.0) + return float(theta) + def invmoments(B): """compute invariant moments. see Digital Image Processing Using MATLAB, ch. 11""" @@ -181,4 +223,3 @@ def ss(D): # flipped across horizontal (major) axis bflip = ss(np.flipud(B)) return b180, b90, bflip - diff --git a/ifcb_features/blobs.py b/ifcb_features/blobs.py index 769ca48..024d5e8 100644 --- a/ifcb_features/blobs.py +++ b/ifcb_features/blobs.py @@ -2,8 +2,6 @@ from scipy.ndimage import measurements -from skimage.transform import rotate - from .morphology import SE2, SE3, EIGHT, bwmorph_thin def label_blobs(B): @@ -19,32 +17,121 @@ def find_blobs(B): to those bounding boxes""" B = np.array(B).astype(np.bool) labeled, objects = label_blobs(B) - blobs = [labeled[obj]==ix+1 for ix, obj in zip(range(len(objects)), objects)] - return labeled, objects, blobs + # Match MATLAB blob_geomprop: sort connected components by area (descending). + labeled_objects = [(ix + 1, obj) for ix, obj in enumerate(objects)] + def sort_key(item): + label_id, obj = item + comp = labeled[obj] == label_id + area = int(np.sum(comp)) + return (-area, obj[1].start, obj[0].start) + labeled_objects.sort(key=sort_key) + objects_sorted = [obj for _, obj in labeled_objects] + blobs = [labeled[obj] == label for label, obj in labeled_objects] + return labeled, objects_sorted, blobs def center_blob(B): - """returns a new image centered on the blob's - centroid""" - # compute centroid - yc, xc = np.mean(np.vstack(np.where(B)),axis=1) - # center + """Center blob on centroid, matching MATLAB center_blob.m.""" + B = np.array(B).astype(np.bool) + # MATLAB uses regionprops centroid (x, y) with 1-based coordinates. + ys, xs = np.where(B) + if ys.size == 0: + return B.copy() h, w = B.shape - s = max(yc,h-yc,xc,w-xc) - m = int(np.ceil(s*2)) - C = np.zeros((m,m),dtype=np.bool) - y0, x0 = int(np.floor(s-yc)), int(np.floor(s-xc)) - C[y0:y0+h,x0:x0+w]=B + # Match MATLAB center_blob.m: + # centroid is 1-based, then shift to 0-based for padding math. + xc = (np.mean(xs) + 1.0) - 1.0 + yc = (np.mean(ys) + 1.0) - 1.0 + s = max(yc, h - yc, xc, w - xc) + m = int(np.ceil(s * 2)) + C = np.zeros((m, m), dtype=np.bool) + # Integer-exact offsets to avoid float drift. + n = ys.size + sum_y = int(np.sum(ys)) + sum_x = int(np.sum(xs)) + hN = h * n + wN = w * n + ycN = sum_y + xcN = sum_x + sN = max(ycN, hN - ycN, xcN, wN - xcN) + m = int((2 * sN + n - 1) // n) + y0 = int((sN - ycN) // n) + x0 = int((sN - xcN) // n) + C[y0:y0 + h, x0:x0 + w] = B return C def rotate_blob(blob, theta): """rotate a blob counterclockwise""" blob = center_blob(blob) - # note that v2 uses bilinear interpolation in MATLAB - # and that is not available in skimage rotate - # so v3 uses nearest-neighbor - blob = rotate(blob,-1*theta,order=0).astype(np.bool) + # Match MATLAB imrotate(centered, theta, 'nearest', 'crop') + blob = imrotate_nearest_crop(blob, theta) # note that v2 does morphological post-processing and v3 does not return blob + +def imrotate_nearest_crop(img, angle_deg): + """MATLAB-compatible imrotate(..., 'nearest', 'crop') for binary masks.""" + img = np.array(img).astype(np.bool) + h, w = img.shape + + # MATLAB blob_rotate calls imrotate(centered, -Orientation, ...). The + # Python caller passes the already sign-adjusted orientation, so use the + # opposite angle for MATLAB's affine2d/imwarp coordinate convention. + ang = np.deg2rad(-angle_deg) + cos_a = np.cos(ang) + sin_a = np.sin(ang) + + # imrotate's general-rotation crop path is: + # RA = imref2d(size(A)); + # Rout = applyGeometricTransformToSpatialRef(RA, tform); + # Rout.ImageSize = RA.ImageSize; + # Rout limits are shifted to preserve the center; + # imwarp(A, tform, 'nearest', 'OutputView', Rout) + # Default imref2d world limits are [0.5, size + 0.5]. + x_limits = (0.5, w + 0.5) + y_limits = (0.5, h + 0.5) + corners = np.array( + [ + [x_limits[0], y_limits[0]], + [x_limits[0], y_limits[1]], + [x_limits[1], y_limits[0]], + [x_limits[1], y_limits[1]], + ], + dtype=np.float64, + ) + x_out = corners[:, 0] * cos_a + corners[:, 1] * sin_a + y_out = -corners[:, 0] * sin_a + corners[:, 1] * cos_a + x_trans = (float(np.min(x_out)) + float(np.max(x_out))) / 2.0 - ( + x_limits[0] + x_limits[1] + ) / 2.0 + y_trans = (float(np.min(y_out)) + float(np.max(y_out))) / 2.0 - ( + y_limits[0] + y_limits[1] + ) / 2.0 + x_world_min = x_limits[0] + x_trans + y_world_min = y_limits[0] + y_trans + + # MATLAB's imwarp path lands infinitesimally below exact half-pixel + # boundaries for the observed tie cases. Nudge the crop reference down + # by two ULPs so nearest-neighbor rounding follows the same side. + x_world_min = np.nextafter(np.nextafter(x_world_min, -np.inf), -np.inf) + y_world_min = np.nextafter(np.nextafter(y_world_min, -np.inf), -np.inf) + + rows, cols = np.indices((h, w), dtype=np.float64) + x_world = x_world_min + (cols + 1.0 - 0.5) + y_world = y_world_min + (rows + 1.0 - 0.5) + + # transformPointsInverse for affine2d rotation matrix + # [cos -sin 0; sin cos 0; 0 0 1], then worldToIntrinsic(RA). + # With default RA limits and unit pixel extents, world == intrinsic. + x_in = x_world * cos_a - y_world * sin_a + y_in = x_world * sin_a + y_world * cos_a + + # MATLAB round: ties away from zero (works for negative too). + x_idx = (np.sign(x_in) * np.floor(np.abs(x_in) + 0.5)).astype(np.int64) + y_idx = (np.sign(y_in) * np.floor(np.abs(y_in) + 0.5)).astype(np.int64) + + out = np.zeros_like(img, dtype=np.bool) + mask = (x_idx >= 1) & (x_idx <= w) & (y_idx >= 1) & (y_idx <= h) + out[mask] = img[y_idx[mask] - 1, x_idx[mask] - 1] + return out def blob_shape(b0): h, w = b0.shape @@ -60,4 +147,3 @@ def blob_shape(b0): w = int((x1-x0) + 0.5) return h,w - diff --git a/ifcb_features/perimeter.py b/ifcb_features/perimeter.py index 6c92e8c..a3607a7 100644 --- a/ifcb_features/perimeter.py +++ b/ifcb_features/perimeter.py @@ -6,6 +6,8 @@ from skimage.measure import regionprops +from scipy.ndimage import convolve + from .morphology import find_perimeter from .random import simple_prng @@ -18,6 +20,23 @@ def hist_stats(arr): kurtosis = stats.kurtosis(arr,fisher=False) return mean, median, skewness, kurtosis + +def benkrid_perimeter(border_image): + """Perimeter estimator matching MATLAB benkrid_perimeter.""" + weights = np.zeros(50, dtype=np.float64) + weights[[5, 7, 15, 17, 25, 27]] = 1.0 + weights[[21, 33]] = np.sqrt(2) + weights[[13, 23]] = (1 + np.sqrt(2)) / 2 + + kernel = np.array([[10, 2, 10], + [2, 1, 2], + [10, 2, 10]], dtype=np.float64) + + coded = convolve(border_image.astype(np.float64), kernel, mode='constant', cval=0.0) + coded = coded.astype(np.int64) + coded = np.clip(coded, 0, weights.size - 1) + return float(np.sum(weights[coded])) + def subsample_dist(A,max_n=10000): """subsampling version of pdist""" # take a min of n**2 and max of max_n samples diff --git a/ifcb_features/segmentation.py b/ifcb_features/segmentation.py index aef35b2..7de79e9 100644 --- a/ifcb_features/segmentation.py +++ b/ifcb_features/segmentation.py @@ -1,12 +1,9 @@ import numpy as np -from skimage import img_as_float32 +from scipy.ndimage import label from scipy.ndimage.morphology import binary_fill_holes -from skimage.morphology import binary_closing, binary_dilation, binary_erosion, remove_small_objects, diamond +from skimage.morphology import binary_closing, binary_dilation, binary_erosion, diamond import skimage.filters as imfilters -from sklearn.cluster import KMeans - -from skimage.util import img_as_ubyte from .phasecong import phasecong_Mm from .morphology import SE3, hysthresh, bwmorph_thin, EIGHT @@ -19,27 +16,142 @@ BLOB_MIN = 40 DARK_THRESHOLD_ADJUSTMENT = 0.75 + +def _kmeans_1d_strict(values, max_iter=100): + """MATLAB-style 1D k-means (batch) with singleton empty handling.""" + values = np.asarray(values, dtype=np.float32) + n = values.shape[0] + if n == 0: + return np.array([0.0, 1.0], dtype=np.float32), np.zeros(0, dtype=np.int8) + row_indices = np.arange(n) + + def center_for_cluster(idx, cluster): + members = idx == cluster + count = int(np.count_nonzero(members)) + if count == 0: + return np.float32(np.nan), 0 + total = np.cumsum(values[members], dtype=np.float32)[-1] + count_float = np.float32(count) + return np.float32(total / count_float), count + + def distances_to_center(center): + delta = values - np.float32(center) + return np.asarray(delta * delta, dtype=np.float32) + + def assigned_total(distances, idx): + assigned = distances[row_indices, idx] + return np.cumsum(assigned, dtype=np.float32)[-1] + + centers = np.array([0.0, 1.0], dtype=np.float32) + # Initial distances/assignments from provided centers + D = np.empty((n, 2), dtype=np.float32) + D[:, 0] = distances_to_center(centers[0]) + D[:, 1] = distances_to_center(centers[1]) + idx = np.argmin(D, axis=1).astype(np.int8) + + changed = np.array([0, 1], dtype=np.int64) + previdx = np.zeros(n, dtype=np.int8) + prevtotsumD = np.float32(np.inf) + + for _iter in range(max_iter): + # Recompute centers and counts for changed clusters + counts = np.bincount(idx, minlength=2).astype(np.int64) + for c in changed: + if counts[c] > 0: + centers[c], counts[c] = center_for_cluster(idx, c) + + # Update distances for changed clusters + for c in changed: + D[:, c] = distances_to_center(centers[c]) + + # Handle empty clusters (singleton) + empties = [c for c in changed if counts[c] == 0] + if empties: + d_assigned = D[row_indices, idx] + for empty in empties: + lonely = int(np.argmax(d_assigned)) + from_cluster = int(idx[lonely]) + if counts[from_cluster] < 2: + from_cluster = int(np.argmax(counts > 1)) + lonely = int(np.argmax(idx == from_cluster)) + + centers[empty] = values[lonely] + idx[lonely] = empty + counts[empty] = 1 + counts[from_cluster] -= 1 + D[:, empty] = distances_to_center(centers[empty]) + + # Update donor cluster center/distance + if counts[from_cluster] > 0: + centers[from_cluster], counts[from_cluster] = center_for_cluster(idx, from_cluster) + D[:, from_cluster] = distances_to_center(centers[from_cluster]) + changed = np.unique(np.concatenate([changed, np.array([from_cluster], dtype=np.int64)])) + + # Compute objective and check for improvement + totsumD = assigned_total(D, idx) + if prevtotsumD <= totsumD: + idx = previdx + counts = np.bincount(idx, minlength=2).astype(np.int64) + for c in changed: + if counts[c] > 0: + centers[c], counts[c] = center_for_cluster(idx, c) + break + + previdx = idx.copy() + prevtotsumD = totsumD + + # Reassign using nearest centroid, tie -> stay + nidx = np.argmin(D, axis=1).astype(np.int8) + dmin = D[row_indices, nidx] + moved = np.where(nidx != previdx)[0] + if moved.size: + stay_mask = D[moved, previdx[moved]] > dmin[moved] + moved = moved[stay_mask] + if moved.size == 0: + break + idx[moved] = nidx[moved] + changed = np.unique(np.concatenate([idx[moved], previdx[moved]])) + + return centers, idx.astype(np.int8) + def kmeans_segment(roi): - r = img_as_float32(roi) - # compute "dark" and "light" clusters - km = KMeans(n_clusters=2, n_init=1, init=np.array([[0], [1]]), max_iter=100) - km.fit(r.reshape(-1, 1)) - J = km.labels_ - C = km.cluster_centers_ + # Match MATLAB im2single for uint8 by explicit float32 scaling. + if roi.dtype == np.uint8: + r = roi.astype(np.float32) / np.float32(255.0) + else: + r = roi.astype(np.float32) + # Use column-major order to match MATLAB img(:) traversal. + values = r.reshape(-1, order="F") + C, J = _kmeans_1d_strict(values, max_iter=100) + C = C.reshape(-1) + J = J.reshape(-1) + + # reshape labels to image using MATLAB order + J = J.reshape(r.shape, order="F") bg_label = np.argmax(C) # find the darkest pixel value in the bright (background) cluster - roi_1d = r.ravel() - darkest_background = np.min(roi_1d[J == bg_label]) + darkest_background = np.min(r[J == bg_label]) # use it to compute a threshold threshold = darkest_background * DARK_THRESHOLD_ADJUSTMENT - # extend the background using that threshold - J[roi_1d > threshold] = bg_label + # extend the background using that threshold (MATLAB uses >) + J[r > threshold] = bg_label # return True for non-background pixels return (J != bg_label).reshape(roi.shape) def apply_blob_min(roi): - return remove_small_objects(roi, BLOB_MIN + 1, connectivity=2) + # Match MATLAB bwareaopen(img, 41): remove objects with size < 41 (keep 41+). + roi_bool = roi.astype(bool) + structure = np.ones((3, 3), dtype=bool) + labeled, num = label(roi_bool, structure=structure) + if num == 0: + return roi_bool + counts = np.bincount(labeled.ravel()) + keep_labels = np.where(counts >= (BLOB_MIN + 1))[0] + keep_labels = keep_labels[keep_labels != 0] + if keep_labels.size == 0: + return np.zeros_like(roi_bool) + return np.isin(labeled, keep_labels) def segment_roi(roi, raw_stitch=None): # step 1. phase congruency (edge detection)