From dbe6ce8d01f86e28be6287e4c0e3d5659bf88d01 Mon Sep 17 00:00:00 2001 From: indecisiveuser Date: Tue, 10 Mar 2026 16:07:31 -0400 Subject: [PATCH 1/4] fixes downsampling retry logic --- .../net/preibisch/bigstitcher/spark/SparkFusion.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java b/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java index 6f3644e..e3f4118 100644 --- a/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java +++ b/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java @@ -809,7 +809,7 @@ else if ( dataType == DataType.UINT16 ) { final int s = level; - final List allBlocks = Grid.create( + List allBlocks = Grid.create( new long[] { mrInfo[ level ].dimensions[ 0 ], mrInfo[ level ].dimensions[ 1 ], mrInfo[ level ].dimensions[ 2 ] }, computeBlockSize, blockSize); @@ -868,7 +868,7 @@ else if ( dataType == DataType.UINT16 ) // extract all blocks that failed final Set failedBlocksSet = - retryTrackerDS.processWithSpark( rddDSResult, grid ); + retryTrackerDS.processWithSpark( rddDSResult, allBlocks ); // Use RetryTracker to handle retry counting and removal if (!retryTrackerDS.processFailures(failedBlocksSet)) @@ -877,11 +877,11 @@ else if ( dataType == DataType.UINT16 ) System.exit( 1 ); } - // Update grid for next iteration with remaining failed blocks - grid.clear(); - grid.addAll(failedBlocksSet); + // Update allBlocks for next iteration with remaining failed blocks + allBlocks.clear(); + allBlocks.addAll(failedBlocksSet); } - while ( grid.size() > 0 ); + while ( allBlocks.size() > 0 ); System.out.println( new Date( System.currentTimeMillis() ) + ": Saved level s " + level + ", took: " + (System.currentTimeMillis() - time ) + " ms." ); } From 6fc07cf8327eaf033af4e48c3132367164823ad7 Mon Sep 17 00:00:00 2001 From: indecisiveuser Date: Tue, 24 Mar 2026 17:48:13 -0400 Subject: [PATCH 2/4] blockscalestring only available if not using sharding (fusion) --- .../java/net/preibisch/bigstitcher/spark/SparkFusion.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java b/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java index e3f4118..fa91e14 100644 --- a/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java +++ b/src/main/java/net/preibisch/bigstitcher/spark/SparkFusion.java @@ -409,9 +409,11 @@ else if ( viewIdsGlobal = Import.getViewIds( dataGlobal ); } - final int[] blocksPerJob = Import.csvStringToIntArray(blockScaleString); + final int[] blocksPerJob = useSharding ? null : Import.csvStringToIntArray(blockScaleString); System.out.println( "Fusing: " + boundingBox.getTitle() + ": " + Util.printInterval( boundingBox ) + - " with blocksize " + Util.printCoordinates( blockSize ) + " and " + Util.printCoordinates( blocksPerJob ) + " blocks per job/shard" ); + " with blocksize " + Util.printCoordinates( blockSize ) + ( useSharding + ? " and shard size " + Util.printCoordinates( shardSize ) + : " and " + Util.printCoordinates( blocksPerJob ) + " blocks per job" ) ); if ( dataType == DataType.UINT8 ) System.out.println( "Fusing to UINT8, min intensity = " + minIntensity + ", max intensity = " + maxIntensity ); From 192a61d2cc30846d16a16bbaa81f06cfeb37195e Mon Sep 17 00:00:00 2001 From: indecisiveuser Date: Wed, 25 Mar 2026 15:00:48 -0400 Subject: [PATCH 3/4] added timing stat logging to IP detection --- .../bigstitcher/spark/SparkInterestPointDetection.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java b/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java index b245158..c2dab9a 100644 --- a/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java +++ b/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java @@ -905,9 +905,13 @@ public Void call() throws Exception final String params = "DOG (Spark) s=" + sigma + " t=" + threshold + " overlappingOnly=" + overlappingOnly + " min=" + findMin + " max=" + findMax + " downsampleXY=" + downsampleXY + " downsampleZ=" + downsampleZ + " minIntensity=" + minIntensity + " maxIntensity=" + maxIntensity; + long timeAddIP = System.currentTimeMillis(); InterestPointTools.addInterestPoints( dataGlobal, label, interestPoints, params ); + System.out.println( "addInterestPoints took " + ( System.currentTimeMillis() - timeAddIP ) + " ms." ); + long timeSaveXML = System.currentTimeMillis(); new XmlIoSpimData2().save( dataGlobal, xmlURI ); + System.out.println( "Saving XML took " + ( System.currentTimeMillis() - timeSaveXML ) + " ms." ); // store image intensities for interest points if( storeIntensities ) @@ -916,6 +920,7 @@ public Void call() throws Exception { try { + long timeIntensities = System.currentTimeMillis(); System.out.println( "Retrieving intensities for interest points '" + label + "' for " + Group.pvid(viewId) + " ... " ); final InterestPointsN5 i = (InterestPointsN5)dataGlobal.getViewInterestPoints().getViewInterestPointLists( viewId ).getInterestPointList( label ); @@ -952,8 +957,8 @@ public Void call() throws Exception N5Utils.save( intensityData, n5Writer, datasetIntensities, new int[] { 1, InterestPointsN5.defaultBlockSize }, new ZstandardCompression() ); } - System.out.println( "Saved: " + tempURI + "/" + datasetIntensities ); - + System.out.println( "Saved: " + tempURI + "/" + datasetIntensities + " (took " + ( System.currentTimeMillis() - timeIntensities ) + " ms)" ); + } catch ( Exception e ) { From f3576235f711a881f6ca233da4d28762cb4cb002 Mon Sep 17 00:00:00 2001 From: indecisiveuser Date: Wed, 25 Mar 2026 15:10:36 -0400 Subject: [PATCH 4/4] only build KDTree once (IP detection) --- .../spark/SparkInterestPointDetection.java | 72 ++++++++++++------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java b/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java index c2dab9a..9054db9 100644 --- a/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java +++ b/src/main/java/net/preibisch/bigstitcher/spark/SparkInterestPointDetection.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; import java.util.Random; import java.util.concurrent.Callable; @@ -63,6 +64,7 @@ import net.imglib2.img.array.ArrayImgs; import net.imglib2.interpolation.randomaccess.NLinearInterpolatorFactory; import net.imglib2.neighborsearch.NearestNeighborSearchOnKDTree; +import net.imglib2.neighborsearch.RadiusNeighborSearchOnKDTree; import net.imglib2.position.FunctionRandomAccessible; import net.imglib2.realtransform.AffineTransform3D; import net.imglib2.type.numeric.RealType; @@ -822,41 +824,63 @@ public Void call() throws Exception else intensitiesList = null; - // combine points since overlapping areas might exist + // combine points since overlapping areas might exist; + // collect all candidates first so we can build the KDTree only once per view + final ArrayList< InterestPoint > candidates = new ArrayList<>(); + final ArrayList< Double > candidateIntensities = ( storeIntensities || maxSpots > 0 ) ? new ArrayList<>() : null; + for ( int l = 0; l < ipsList.size(); ++l ) { - final List< InterestPoint > ips = ipsList.get( l ); - final List< Double > intensities; + candidates.addAll( ipsList.get( l ) ); if ( storeIntensities || maxSpots > 0 ) - intensities = intensitiesList.get( l ); - else - intensities = null; + candidateIntensities.addAll( intensitiesList.get( l ) ); + } - if ( !overlappingOnly || myIps.size() == 0 ) - { - myIps.addAll( ips ); + if ( !overlappingOnly || ipsList.size() <= 1 ) + { + // no inter-block deduplication needed + myIps.addAll( candidates ); - if ( storeIntensities || maxSpots > 0 ) - myIntensities.addAll( intensities ); - } - else + if ( storeIntensities || maxSpots > 0 ) + myIntensities.addAll( candidateIntensities ); + } + else if ( !candidates.isEmpty() ) + { + // build KDTree ONCE over all candidates and do a single greedy dedup pass; + // earlier-block points always win: for each accepted point, mark all + // later-indexed points within combineDistance as duplicates + final KDTree< InterestPoint > tree = new KDTree<>( candidates, candidates ); + final RadiusNeighborSearchOnKDTree< InterestPoint > search = new RadiusNeighborSearchOnKDTree<>( tree ); + + // map object identity → index in candidates for O(1) lookup after radius search + final IdentityHashMap< InterestPoint, Integer > indexMap = new IdentityHashMap<>( candidates.size() ); + for ( int i = 0; i < candidates.size(); ++i ) + indexMap.put( candidates.get( i ), i ); + + final boolean[] isDuplicate = new boolean[ candidates.size() ]; // default false + + for ( int i = 0; i < candidates.size(); ++i ) { - final KDTree< InterestPoint > tree = new KDTree<>(myIps, myIps); - final NearestNeighborSearchOnKDTree< InterestPoint > search = new NearestNeighborSearchOnKDTree<>( tree ); + if ( isDuplicate[ i ] ) continue; - for ( int i = 0; i < ips.size(); ++i ) + search.search( candidates.get( i ), combineDistance, false ); + for ( int k = 0; k < search.numNeighbors(); ++k ) { - final InterestPoint ip = ips.get( i ); - search.search( ip ); + final Integer j = indexMap.get( search.getSampler( k ).get() ); + if ( j != null && j > i ) + isDuplicate[ j ] = true; + } + } - if ( search.getDistance() > combineDistance ) - { - myIps.add( ip ); + for ( int i = 0; i < candidates.size(); ++i ) + { + if ( !isDuplicate[ i ] ) + { + myIps.add( candidates.get( i ) ); - if ( storeIntensities || maxSpots > 0 ) - myIntensities.add( intensities.get( i ) ); - } + if ( storeIntensities || maxSpots > 0 ) + myIntensities.add( candidateIntensities.get( i ) ); } } }