Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d106257
initial feature matching fixes
johnwaalsh Feb 2, 2026
d5434ef
add old center fix
johnwaalsh Feb 4, 2026
ea732de
progress on SurfaceArea and BioVolume SOR float difference
johnwaalsh Feb 5, 2026
8800891
potential fix for kmeans behavior
johnwaalsh Feb 12, 2026
61de9a3
float precision fix
johnwaalsh Feb 12, 2026
4069c26
match MATLAB behavior for multiple blobs in one image
johnwaalsh Feb 13, 2026
487ed11
potential SOR fix
johnwaalsh Feb 13, 2026
1cfc787
switch to integer math for SOR calculation
johnwaalsh Feb 18, 2026
41e8ad4
potential float rounding fix for biovolume calculation
johnwaalsh Feb 18, 2026
8ea9435
add debug print
johnwaalsh Feb 18, 2026
25780ac
connected component fix to use 8-connected labeling like MATLAB
johnwaalsh Feb 19, 2026
4a43356
add deterministic sum for biovolume calculation
johnwaalsh Feb 20, 2026
7d0b5de
mean generation fix -- nans should not be part of the mean
johnwaalsh Feb 20, 2026
4b8fec1
add deterministic summing to heidi distmap function
johnwaalsh Feb 20, 2026
67f74f7
make heidi distmap version the default
johnwaalsh Feb 20, 2026
cae3d66
switch sum accumulator to explicitly use float32
johnwaalsh Feb 20, 2026
d0d4cb1
add explicit surface area summing
johnwaalsh Feb 24, 2026
3dc38fe
add largest 8-connected component fix
johnwaalsh Feb 25, 2026
2579531
round float64 values to float32
johnwaalsh Feb 27, 2026
e9207d4
add more explicit float32 casting
johnwaalsh Feb 27, 2026
04da5af
add zero to NaN conversion and remove unused roi param
johnwaalsh Mar 5, 2026
d3cc825
remove debug function
johnwaalsh Mar 5, 2026
34aabe4
remove debug code
johnwaalsh Mar 5, 2026
d99b22f
remove kmeans debug code
johnwaalsh Mar 5, 2026
904305f
remove sor debug code
johnwaalsh Mar 5, 2026
95f9958
remove unused imports
johnwaalsh Mar 5, 2026
76255b6
remove unused code from debugging
johnwaalsh Mar 5, 2026
f6db042
fix comment
johnwaalsh Mar 5, 2026
823a020
add explicit float rounding in kmeans
johnwaalsh May 7, 2026
5c0b805
Merge branch 'match_matlab' of github.com:WHOIGit/ifcb-features into …
johnwaalsh May 7, 2026
cf3a59b
switch from for loops to vectorization for speedup
johnwaalsh May 8, 2026
ffedc17
add more explicit orientation code
johnwaalsh May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions extract_slim_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def extract_and_save_all_features(data_directory, output_directory, bins=None):

if all_features:
df = pd.DataFrame.from_records(all_features, columns=['roi_number'] + FEATURE_COLUMNS)
df.to_csv(features_output_filename, index=False)
df.to_csv(features_output_filename, index=False, float_format="%.10g")

if all_blobs:
with zipfile.ZipFile(blobs_output_filename, 'w') as zf:
Expand All @@ -130,4 +130,4 @@ def extract_and_save_all_features(data_directory, output_directory, bins=None):
extract_and_save_all_features(args.data_directory, args.output_directory, args.bins)
elapsed = time.time() - beginning

print(f'Total extract time: {elapsed:.2f} seconds')
print(f'Total extract time: {elapsed:.2f} seconds')
67 changes: 47 additions & 20 deletions ifcb_features/all.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np

from scipy.ndimage.measurements import find_objects
from scipy.spatial import QhullError

from skimage.measure import regionprops
from skimage.measure import label, regionprops

from functools import lru_cache

Expand All @@ -11,11 +12,12 @@
from .blob_geometry import (equiv_diameter, ellipse_properties,
invmoments, convex_hull, convex_hull_image,
convex_hull_properties, feret_diameter,
binary_symmetry, feret_diameters)
binary_symmetry, feret_diameters,
explicit_orientation)
from .morphology import find_perimeter
from .biovolume import (distmap_volume_surface_area,
sor_volume_surface_area)
from .perimeter import perimeter_stats, hausdorff_symmetry
from .perimeter import perimeter_stats, hausdorff_symmetry, benkrid_perimeter
from .texture import statxture, masked_pixels, texture_pixels
from .hog import image_hog
from .ringwedge import ring_wedge, _N_RINGS, _N_WEDGES
Expand Down Expand Up @@ -43,8 +45,13 @@ def size(self):
@property
@lru_cache()
def regionprops(self):
"""region props of the blob (assumes single connected region)"""
return regionprops(self.image.astype(np.uint8))[0]
"""region props of the blob (assumes single connected region)."""
labels = label(self.image.astype(np.uint8), connectivity=2)
props = regionprops(labels)
if not props:
return None
# Use the largest region to match MATLAB's 8-connected component.
return max(props, key=lambda p: p.area)
@property
@lru_cache()
def area(self):
Expand All @@ -58,7 +65,7 @@ def equiv_diameter(self):
@property
@lru_cache()
def perimeter(self):
return self.regionprops.perimeter
return benkrid_perimeter(self.perimeter_image)
@property
def area_over_perimeter_squared(self):
return self.area / self.perimeter**2
Expand All @@ -74,11 +81,20 @@ def extent(self):
@lru_cache()
def convex_hull(self):
"""vertices of convex hull of blob"""
return convex_hull(self.perimeter_points)
try:
return convex_hull(self.perimeter_points)
except QhullError:
# Colinear or degenerate perimeter points; fall back to raw points.
return np.vstack(self.perimeter_points).T
@property
@lru_cache()
def convex_hull_properties(self):
return convex_hull_properties(self.convex_hull)
hull = self.convex_hull
if hull is None or hull.shape[0] < 3:
return self.perimeter, self.area
if np.linalg.matrix_rank(hull - hull[0]) < 2:
return self.perimeter, self.area
return convex_hull_properties(hull)
@property
@lru_cache()
def convex_perimeter(self):
Expand Down Expand Up @@ -136,11 +152,7 @@ def eccentricity(self):
@lru_cache()
def orientation(self):
"""return orientation of blob in degrees"""
rad = self.ellipse_properties[3]
deg = (180/np.pi) * rad - 90
if deg < -90:
deg += 180
return deg
return explicit_orientation(self.image)
@property
@lru_cache()
def centroid(self):
Expand Down Expand Up @@ -190,7 +202,19 @@ def perimeter_points(self):
@lru_cache()
def distmap_volume_surface_area(self):
"""volume of blob computed via Moberg & Sosik algorithm"""
return distmap_volume_surface_area(self.image, self.perimeter_image)
# MATLAB uses a tight cropped blob image for distmap via regionprops.Image.
img = self.regionprops.image.astype(bool)
# Match MATLAB blob_geomprop: use largest 8-connected component.
if img.any():
from scipy.ndimage import label
labeled, num = label(img, structure=np.ones((3, 3), dtype=np.int32))
if num > 1:
counts = np.bincount(labeled.ravel())
counts[0] = 0
largest = counts.argmax()
img = labeled == largest
perim = find_perimeter(img)
return distmap_volume_surface_area(img, perim)
@property
@lru_cache()
def sor_volume_surface_area(self):
Expand Down Expand Up @@ -312,8 +336,7 @@ def blobs(self):
the segmented mask, ordered by largest area to smallest area"""
labeled, bboxes, blobs = find_blobs(self.blobs_image)
cropped_rois = [self.image[bbox] for bbox in bboxes]
# ignore 1-pixel wide/high blobs
Bs = [BlobFeatures(b,R) for b,R in zip(blobs,cropped_rois) if min(R.shape) > 1]
Bs = [BlobFeatures(b, R) for b, R in zip(blobs, cropped_rois)]
# sort by area, largest first
return sorted(Bs, key=lambda B: B.area, reverse=True)
@property
Expand Down Expand Up @@ -422,7 +445,9 @@ def ring(self):
return self.ring_wedge[3]
@lru_cache()
def summed_attr(self, attr):
return np.sum(getattr(b,attr) for b in self.blobs)
vals = np.array([getattr(b, attr) for b in self.blobs], dtype=np.float64)
# MATLAB stores blob props in double arrays and sums in double.
return float(np.sum(vals, dtype=np.float64))
@property
def summed_area(self):
return self.summed_attr('area')
Expand Down Expand Up @@ -504,9 +529,11 @@ def compute_features(roi_image, blobs_image=None, raw_stitch=None):
('summedPerimeter', r.summed_perimeter),
('summedSurfaceArea', r.summed_surface_area),
]
def _zero_to_nan(v):
return float('nan') if v == 0 else v
f += [
('Area_over_PerimeterSquared', b.area_over_perimeter_squared),
('Area_over_Perimeter', b.area_over_perimeter),
('summedConvexPerimeter_over_Perimeter', r.summed_convex_perimeter_over_perimeter),
('Area_over_PerimeterSquared', _zero_to_nan(b.area_over_perimeter_squared)),
('Area_over_Perimeter', _zero_to_nan(b.area_over_perimeter)),
('summedConvexPerimeter_over_Perimeter', _zero_to_nan(r.summed_convex_perimeter_over_perimeter)),
]
return (r.blobs_image, f)
108 changes: 66 additions & 42 deletions ifcb_features/biovolume.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

from scipy.ndimage import distance_transform_edt
from scipy.ndimage import binary_fill_holes, distance_transform_edt

from .morphology import find_perimeter

Expand Down Expand Up @@ -44,60 +44,84 @@ def bottom_top_area(X,Y,Z,ignore_ground=False):

return area_bot, area_top

def distmap_volume_surface_area(B,perimeter_image=None):
"""Moberg & Sosik biovolume algorithm
returns volume and representative transect"""
def _det_sum32(arr):
"""Deterministic float32 sum in column-major order."""
sum_acc = np.float32(0.0)
flat = arr.ravel(order="F")
for v in flat:
sum_acc = np.float32(sum_acc + np.float32(v))
return np.float32(sum_acc)


def distmap_volume_surface_area(B, perimeter_image=None):
"""Moberg & Sosik biovolume algorithm (Heidi_explore implementation).
Returns volume, representative transect, and surface area."""
if perimeter_image is None:
perimeter_image = find_perimeter(B)
# elementwise distance to perimeter + 1
D = distance_transform_edt(1-perimeter_image) + 1
# mask distances outside blob
D = D * (B>0)
Dm = np.ma.array(D,mask=1-B)
# representative transect
x = 4 * np.ma.mean(Dm) - 2
# diamond correction
c1 = x**2 / (x**2 + 2*x + 0.5)
# circle correction
# c2 = np.pi / 2
# volume = c1 * c2 * 2 * np.sum(D)
volume = c1 * np.pi * np.sum(D)
# calculate distance map (MATLAB bwdist)
D = distance_transform_edt(1 - perimeter_image) + 1
# mask distances outside filled perimeter
fill = binary_fill_holes(np.array(perimeter_image, dtype=bool))
D = D.astype(np.float64)
D[~fill] = np.nan
# deterministic sum/mean over column-major order to match MATLAB loop
flat = D.ravel(order="F")
sum_acc = np.float32(0.0)
cnt = 0
for v in flat:
if not np.isnan(v):
sum_acc = np.float32(sum_acc + np.float32(v))
cnt += 1
sum_val = np.float32(sum_acc)
mean_val = np.float32(sum_acc / np.float32(cnt)) if cnt else np.float32(np.nan)
# representative transect length
x = np.float32(4.0) * mean_val - np.float32(2.0)
# correction factors
c1 = (x**2) / (x**2 + np.float32(2.0) * x + np.float32(0.5))
c2 = np.float32(np.pi / 2.0)
volume = np.float32(c1 * c2 * np.float32(2.0) * sum_val)
# surface area
h, w = D.shape
Y, X = np.mgrid[0:h,0:w]
area_bot, area_top = bottom_top_area(X,Y,D,ignore_ground=True)
# final correction of the diamond cross-section
# inherent in the distance map to be circular instead
c = (np.pi * x / 2) / (2 * np.sqrt(2) * x / 2 + (1 + np.sqrt(2)) / 2)
sa = 2 * c * (np.sum(area_bot) + np.sum(area_top))
# return volume, representative transect, and surface area
D_sa = np.nan_to_num(D, nan=0.0).astype(np.float32, copy=False)
h, w = D_sa.shape
Y, X = np.mgrid[1:h + 1, 1:w + 1]
X = X.astype(np.float32, copy=False)
Y = Y.astype(np.float32, copy=False)
area_bot, area_top = bottom_top_area(X, Y, D_sa, ignore_ground=True)
# compute c fully in float32 to match MATLAB single-precision path
c = (np.float32(np.pi) * np.float32(x) / np.float32(2.0)) / (
np.float32(2.0) * np.float32(np.sqrt(2.0)) * np.float32(x) / np.float32(2.0)
+ (np.float32(1.0) + np.float32(np.sqrt(2.0))) / np.float32(2.0)
)
sum_bot = _det_sum32(area_bot.astype(np.float32))
sum_top = _det_sum32(area_top.astype(np.float32))
sa = np.float32(2.0) * np.float32(c) * np.float32(sum_bot + sum_top)
return volume, x, sa

def sor_volume_surface_area(B):
"""pass in rotated blob"""
"""Sosik and Kilfoyle surface area / volume algorithm"""
# find the bottom point of the circle for each column
m = np.argmax(B,axis=0)
# compute the radius of each slice
d = np.sum(B,axis=0)
# exclude 0s
r = (d/2.)[d>0]
m = m[d>0]
# compute center as top-edge + radius to match MATLAB
r = np.sum(B, axis=0).astype(np.float64)
ri = r > 0
r = (r / 2.0)[ri]
y1 = np.argmax(B, axis=0).astype(np.float64) + 1.0
y1 = y1[ri]
center = y1 + r
n_slices = r.size
# compute 721 angles between 0 and 180 degrees inclusive, in radians
n_angles = 721
angR = np.linspace(0,180,n_angles) * (np.pi / 180)
# compute angles between 0 and 180 degrees inclusive, in radians
da = 0.25
angvec = np.arange(0, 180 + da / 2, da, dtype=np.float64)
n_angles = angvec.size
angR = angvec * (np.pi / 180)

# make everything the same shape: (nslices, nangles)
angR = np.vstack([angR] * m.size)
m = np.vstack([m] * n_angles).T
angR = np.vstack([angR] * n_slices)
r = np.vstack([r] * n_angles).T

# compute the center of each slice
center = m + r
center = np.vstack([center] * n_angles).T
# correct for edge effects
center[0,:]=center[1,:]
center[-1,:]=center[-2,:]
if n_slices >= 2:
center[0, :] = center[1, :]
center[-1, :] = center[-2, :]

# y coordinates of all angles on all slices
Y = center + np.cos(angR) * r
Expand Down
43 changes: 42 additions & 1 deletion ifcb_features/blob_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,48 @@ def ellipse_properties(B):

return maj_axis, min_axis, ecc, orientation


def explicit_orientation(B):
"""Deterministic blob orientation in degrees for MATLAB/Python parity."""
B = np.array(B).astype(np.bool)
rows, cols = np.indices(B.shape, dtype=np.float64)
x = (cols + 1.0).ravel(order='C')
y = (rows + 1.0).ravel(order='C')
f = B.astype(np.float64).ravel(order='C')

m00 = np.float64(0.0)
m10 = np.float64(0.0)
m01 = np.float64(0.0)
for xi, yi, fi in zip(x, y, f):
m00 = np.float64(m00 + np.float64(fi))
m10 = np.float64(m10 + np.float64(xi * fi))
m01 = np.float64(m01 + np.float64(yi * fi))

if m00 == 0:
return 0.0

xbar = np.float64(m10 / m00)
ybar = np.float64(m01 / m00)

mu20 = np.float64(0.0)
mu02 = np.float64(0.0)
mu11 = np.float64(0.0)
for xi, yi, fi in zip(x, y, f):
dx = np.float64(xi - xbar)
dy = np.float64(yi - ybar)
mu20 = np.float64(mu20 + np.float64(dx * dx * fi))
mu02 = np.float64(mu02 + np.float64(dy * dy * fi))
mu11 = np.float64(mu11 + np.float64(dx * dy * fi))

theta = np.float64(-0.5) * np.float64(
np.degrees(np.arctan2(np.float64(2.0) * mu11, mu20 - mu02))
)
while theta > 90.0:
theta = np.float64(theta - 180.0)
while theta <= -90.0:
theta = np.float64(theta + 180.0)
return float(theta)

def invmoments(B):
"""compute invariant moments. see
Digital Image Processing Using MATLAB, ch. 11"""
Expand Down Expand Up @@ -181,4 +223,3 @@ def ss(D):
# flipped across horizontal (major) axis
bflip = ss(np.flipud(B))
return b180, b90, bflip

Loading
Loading