Skip to content

Commit 32f785f

Browse files
authored
Further tests and delete chunk data after resize (#49)
* add tests * test resize and reopen array * add resizeMetadataOnly argument for resize * reformat * default to parallel read, write * resize default to parallel and resizeMetadataOnly * update tests with long
1 parent 4e4eea9 commit 32f785f

9 files changed

Lines changed: 650 additions & 19 deletions

File tree

src/main/java/dev/zarr/zarrjava/core/Array.java

Lines changed: 147 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import dev.zarr.zarrjava.ZarrException;
44
import dev.zarr.zarrjava.core.codec.CodecPipeline;
55
import dev.zarr.zarrjava.store.FilesystemStore;
6-
import dev.zarr.zarrjava.store.Store;
76
import dev.zarr.zarrjava.store.StoreHandle;
87
import dev.zarr.zarrjava.utils.IndexingUtils;
98
import dev.zarr.zarrjava.utils.MultiArrayUtils;
@@ -17,14 +16,12 @@
1716
import java.nio.file.Path;
1817
import java.nio.file.Paths;
1918
import java.util.Arrays;
20-
import java.util.List;
21-
import java.util.Set;
22-
import java.util.stream.Collectors;
2319
import java.util.stream.Stream;
2420

2521
public abstract class Array extends AbstractNode {
2622

2723
protected CodecPipeline codecPipeline;
24+
public static final boolean DEFAULT_PARALLELISM = true;
2825

2926
protected Array(StoreHandle storeHandle) throws ZarrException {
3027
super(storeHandle);
@@ -184,6 +181,110 @@ public ucar.ma2.Array readChunk(long[] chunkCoords) throws ZarrException {
184181
return codecPipeline.decode(chunkBytes);
185182
}
186183

184+
/**
185+
* Deletes chunks that are completely outside the new shape and trims boundary chunks.
186+
*
187+
* @param newShape the new shape of the array
188+
* @param parallel utilizes parallelism if true
189+
*/
190+
protected void cleanupChunksForResize(long[] newShape, boolean parallel) {
191+
ArrayMetadata metadata = metadata();
192+
final int[] chunkShape = metadata.chunkShape();
193+
final int ndim = metadata.ndim();
194+
final dev.zarr.zarrjava.core.chunkkeyencoding.ChunkKeyEncoding chunkKeyEncoding = metadata.chunkKeyEncoding();
195+
196+
// Calculate max valid chunk coordinates for the new shape
197+
long[] newMaxChunkCoords = new long[ndim];
198+
for (int i = 0; i < ndim; i++) {
199+
newMaxChunkCoords[i] = (newShape[i] + chunkShape[i] - 1) / chunkShape[i];
200+
}
201+
202+
// Iterate over all possible chunk coordinates in the old shape
203+
long[][] allOldChunkCoords = IndexingUtils.computeChunkCoords(metadata.shape, chunkShape);
204+
205+
Stream<long[]> chunkStream = Arrays.stream(allOldChunkCoords);
206+
if (parallel) {
207+
chunkStream = chunkStream.parallel();
208+
}
209+
210+
chunkStream.forEach(chunkCoords -> {
211+
boolean isOutsideBounds = false;
212+
boolean isOnBoundary = false;
213+
214+
for (int dimIdx = 0; dimIdx < ndim; dimIdx++) {
215+
if (chunkCoords[dimIdx] >= newMaxChunkCoords[dimIdx]) {
216+
isOutsideBounds = true;
217+
break;
218+
}
219+
// Check if this chunk is on the boundary (partially outside new shape)
220+
long chunkEnd = (chunkCoords[dimIdx] + 1) * chunkShape[dimIdx];
221+
if (chunkEnd > newShape[dimIdx]) {
222+
isOnBoundary = true;
223+
}
224+
}
225+
226+
String[] chunkKeys = chunkKeyEncoding.encodeChunkKey(chunkCoords);
227+
StoreHandle chunkHandle = storeHandle.resolve(chunkKeys);
228+
229+
if (isOutsideBounds) {
230+
// Delete chunk that is completely outside
231+
chunkHandle.delete();
232+
} else if (isOnBoundary) {
233+
// Trim boundary chunk - read, clear out-of-bounds data, write back
234+
try {
235+
trimBoundaryChunk(chunkCoords, newShape, chunkShape);
236+
} catch (ZarrException e) {
237+
throw new RuntimeException(e);
238+
}
239+
}
240+
});
241+
}
242+
243+
/**
244+
* Trims a boundary chunk by reading it, clearing the out-of-bounds portion, and writing it back.
245+
*
246+
* @param chunkCoords the coordinates of the chunk to trim
247+
* @param newShape the new shape of the array
248+
* @param chunkShape the shape of the chunks
249+
* @throws ZarrException if reading or writing the chunk fails
250+
*/
251+
protected void trimBoundaryChunk(long[] chunkCoords, long[] newShape, int[] chunkShape) throws ZarrException {
252+
ArrayMetadata metadata = metadata();
253+
final int ndim = metadata.ndim();
254+
255+
// Calculate the valid region within this chunk
256+
int[] validShape = new int[ndim];
257+
boolean needsTrimming = false;
258+
for (int dimIdx = 0; dimIdx < ndim; dimIdx++) {
259+
long chunkStart = chunkCoords[dimIdx] * chunkShape[dimIdx];
260+
long chunkEnd = chunkStart + chunkShape[dimIdx];
261+
if (chunkEnd > newShape[dimIdx]) {
262+
validShape[dimIdx] = (int) (newShape[dimIdx] - chunkStart);
263+
needsTrimming = true;
264+
} else {
265+
validShape[dimIdx] = chunkShape[dimIdx];
266+
}
267+
}
268+
269+
if (!needsTrimming) {
270+
return;
271+
}
272+
273+
// Read the existing chunk
274+
ucar.ma2.Array chunkData = readChunk(chunkCoords);
275+
276+
// Create a new chunk filled with fill value
277+
ucar.ma2.Array newChunkData = metadata.allocateFillValueChunk();
278+
279+
// Copy only the valid region
280+
MultiArrayUtils.copyRegion(
281+
chunkData, new int[ndim], newChunkData, new int[ndim], validShape
282+
);
283+
284+
// Write the trimmed chunk back
285+
writeChunk(chunkCoords, newChunkData);
286+
}
287+
187288

188289
/**
189290
* Writes a ucar.ma2.Array into the Zarr array at the beginning of the Zarr array. The shape of
@@ -205,7 +306,7 @@ public void write(ucar.ma2.Array array) {
205306
* @param array the data to write
206307
*/
207308
public void write(long[] offset, ucar.ma2.Array array) {
208-
write(offset, array, false);
309+
write(offset, array, DEFAULT_PARALLELISM);
209310
}
210311

211312
/**
@@ -240,7 +341,7 @@ public ucar.ma2.Array read() throws ZarrException {
240341
*/
241342
@Nonnull
242343
public ucar.ma2.Array read(final long[] offset, final long[] shape) throws ZarrException {
243-
return read(offset, shape, false);
344+
return read(offset, shape, DEFAULT_PARALLELISM);
244345
}
245346

246347
/**
@@ -339,6 +440,46 @@ public ucar.ma2.Array read(final long[] offset, final long[] shape, final boolea
339440
return outputArray;
340441
}
341442

443+
/**
444+
* Sets a new shape for the Zarr array. Only the metadata is updated by default.
445+
* This method returns a new instance of the Zarr array class and the old instance
446+
* becomes invalid.
447+
*
448+
* @param newShape the new shape of the Zarr array
449+
* @throws ZarrException if the new metadata is invalid
450+
* @throws IOException throws IOException if the new metadata cannot be serialized
451+
*/
452+
public Array resize(long[] newShape) throws ZarrException, IOException {
453+
return resize(newShape, true);
454+
}
455+
456+
/**
457+
* Sets a new shape for the Zarr array. This method returns a new instance of the Zarr array class
458+
* and the old instance becomes invalid.
459+
*
460+
* @param newShape the new shape of the Zarr array
461+
* @param resizeMetadataOnly if true, only the metadata is updated; if false, chunks outside the new
462+
* bounds are deleted and boundary chunks are trimmed
463+
* @throws ZarrException if the new metadata is invalid
464+
* @throws IOException throws IOException if the new metadata cannot be serialized
465+
*/
466+
public Array resize(long[] newShape, boolean resizeMetadataOnly) throws ZarrException, IOException {
467+
return resize(newShape, resizeMetadataOnly, DEFAULT_PARALLELISM);
468+
}
469+
470+
/**
471+
* Sets a new shape for the Zarr array. This method returns a new instance of the Zarr array class
472+
* and the old instance becomes invalid.
473+
*
474+
* @param newShape the new shape of the Zarr array
475+
* @param resizeMetadataOnly if true, only the metadata is updated; if false, chunks outside the new
476+
* bounds are deleted and boundary chunks are trimmed
477+
* @param parallel utilizes parallelism if true when cleaning up chunks
478+
* @throws ZarrException if the new metadata is invalid
479+
* @throws IOException throws IOException if the new metadata cannot be serialized
480+
*/
481+
public abstract Array resize(long[] newShape, boolean resizeMetadataOnly, boolean parallel) throws ZarrException, IOException;
482+
342483
public ArrayAccessor access() {
343484
return new ArrayAccessor(this);
344485
}

src/main/java/dev/zarr/zarrjava/v2/Array.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,26 +201,63 @@ private Array writeMetadata(ArrayMetadata newArrayMetadata) throws ZarrException
201201
}
202202

203203
/**
204-
* Sets a new shape for the Zarr array. It only changes the metadata, no array data is modified or
205-
* deleted. This method returns a new instance of the Zarr array class and the old instance
204+
* Sets a new shape for the Zarr array. Only the metadata is updated by default.
205+
* This method returns a new instance of the Zarr array class and the old instance
206206
* becomes invalid.
207207
*
208208
* @param newShape the new shape of the Zarr array
209209
* @throws ZarrException if the new metadata is invalid
210210
* @throws IOException throws IOException if the new metadata cannot be serialized
211211
*/
212+
@Override
212213
public Array resize(long[] newShape) throws ZarrException, IOException {
214+
return resize(newShape, true);
215+
}
216+
217+
/**
218+
* Sets a new shape for the Zarr array. This method returns a new instance of the Zarr array class
219+
* and the old instance becomes invalid.
220+
*
221+
* @param newShape the new shape of the Zarr array
222+
* @param resizeMetadataOnly if true, only the metadata is updated; if false, chunks outside the new
223+
* bounds are deleted and boundary chunks are trimmed
224+
* @throws ZarrException if the new metadata is invalid
225+
* @throws IOException throws IOException if the new metadata cannot be serialized
226+
*/
227+
@Override
228+
public Array resize(long[] newShape, boolean resizeMetadataOnly) throws ZarrException, IOException {
229+
return resize(newShape, resizeMetadataOnly, DEFAULT_PARALLELISM);
230+
}
231+
232+
/**
233+
* Sets a new shape for the Zarr array. This method returns a new instance of the Zarr array class
234+
* and the old instance becomes invalid.
235+
*
236+
* @param newShape the new shape of the Zarr array
237+
* @param resizeMetadataOnly if true, only the metadata is updated; if false, chunks outside the new
238+
* bounds are deleted and boundary chunks are trimmed
239+
* @param parallel utilizes parallelism if true when cleaning up chunks
240+
* @throws ZarrException if the new metadata is invalid
241+
* @throws IOException throws IOException if the new metadata cannot be serialized
242+
*/
243+
@Override
244+
public Array resize(long[] newShape, boolean resizeMetadataOnly, boolean parallel) throws ZarrException, IOException {
213245
if (newShape.length != metadata.ndim()) {
214246
throw new IllegalArgumentException(
215247
"'newShape' needs to have rank '" + metadata.ndim() + "'.");
216248
}
217249

250+
if (!resizeMetadataOnly) {
251+
cleanupChunksForResize(newShape, parallel);
252+
}
253+
218254
ArrayMetadata newArrayMetadata = ArrayMetadataBuilder.fromArrayMetadata(metadata)
219255
.withShape(newShape)
220256
.build();
221257
return writeMetadata(newArrayMetadata);
222258
}
223259

260+
224261
/**
225262
* Sets the attributes of the Zarr array. It overwrites and removes any existing attributes. This
226263
* method returns a new instance of the Zarr array class and the old instance becomes invalid.
@@ -248,7 +285,8 @@ public Array setAttributes(Attributes newAttributes) throws ZarrException, IOExc
248285
* @throws IOException throws IOException if the new metadata cannot be serialized
249286
*/
250287
public Array updateAttributes(Function<Attributes, Attributes> attributeMapper) throws ZarrException, IOException {
251-
return setAttributes(attributeMapper.apply(metadata.attributes));
288+
Attributes currentAttributes = metadata.attributes != null ? new Attributes(metadata.attributes) : new Attributes();
289+
return setAttributes(attributeMapper.apply(currentAttributes));
252290
}
253291

254292
@Override

src/main/java/dev/zarr/zarrjava/v2/Group.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
public class Group extends dev.zarr.zarrjava.core.Group implements Node {
2828
public GroupMetadata metadata;
2929

30-
protected Group(@Nonnull StoreHandle storeHandle, @Nonnull GroupMetadata groupMetadata) throws IOException {
30+
protected Group(@Nonnull StoreHandle storeHandle, @Nonnull GroupMetadata groupMetadata) {
3131
super(storeHandle);
3232
this.metadata = groupMetadata;
3333
}
@@ -283,7 +283,8 @@ public Group setAttributes(Attributes newAttributes) throws ZarrException, IOExc
283283
*/
284284
public Group updateAttributes(Function<Attributes, Attributes> attributeMapper)
285285
throws ZarrException, IOException {
286-
return setAttributes(attributeMapper.apply(metadata.attributes));
286+
Attributes currentAttributes = metadata.attributes != null ? new Attributes(metadata.attributes) : new Attributes();
287+
return setAttributes(attributeMapper.apply(currentAttributes));
287288
}
288289

289290

src/main/java/dev/zarr/zarrjava/v3/Array.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,26 +201,63 @@ private Array writeMetadata(ArrayMetadata newArrayMetadata) throws ZarrException
201201
}
202202

203203
/**
204-
* Sets a new shape for the Zarr array. It only changes the metadata, no array data is modified or
205-
* deleted. This method returns a new instance of the Zarr array class and the old instance
204+
* Sets a new shape for the Zarr array. Only the metadata is updated by default.
205+
* This method returns a new instance of the Zarr array class and the old instance
206206
* becomes invalid.
207207
*
208208
* @param newShape the new shape of the Zarr array
209209
* @throws ZarrException if the new metadata is invalid
210210
* @throws IOException throws IOException if the new metadata cannot be serialized
211211
*/
212+
@Override
212213
public Array resize(long[] newShape) throws ZarrException, IOException {
214+
return resize(newShape, true);
215+
}
216+
217+
/**
218+
* Sets a new shape for the Zarr array. This method returns a new instance of the Zarr array class
219+
* and the old instance becomes invalid.
220+
*
221+
* @param newShape the new shape of the Zarr array
222+
* @param resizeMetadataOnly if true, only the metadata is updated; if false, chunks outside the new
223+
* bounds are deleted and boundary chunks are trimmed
224+
* @throws ZarrException if the new metadata is invalid
225+
* @throws IOException throws IOException if the new metadata cannot be serialized
226+
*/
227+
@Override
228+
public Array resize(long[] newShape, boolean resizeMetadataOnly) throws ZarrException, IOException {
229+
return resize(newShape, resizeMetadataOnly, DEFAULT_PARALLELISM);
230+
}
231+
232+
/**
233+
* Sets a new shape for the Zarr array. This method returns a new instance of the Zarr array class
234+
* and the old instance becomes invalid.
235+
*
236+
* @param newShape the new shape of the Zarr array
237+
* @param resizeMetadataOnly if true, only the metadata is updated; if false, chunks outside the new
238+
* bounds are deleted and boundary chunks are trimmed
239+
* @param parallel utilizes parallelism if true when cleaning up chunks
240+
* @throws ZarrException if the new metadata is invalid
241+
* @throws IOException throws IOException if the new metadata cannot be serialized
242+
*/
243+
@Override
244+
public Array resize(long[] newShape, boolean resizeMetadataOnly, boolean parallel) throws ZarrException, IOException {
213245
if (newShape.length != metadata.ndim()) {
214246
throw new IllegalArgumentException(
215247
"'newShape' needs to have rank '" + metadata.ndim() + "'.");
216248
}
217249

250+
if (!resizeMetadataOnly) {
251+
cleanupChunksForResize(newShape, parallel);
252+
}
253+
218254
ArrayMetadata newArrayMetadata = ArrayMetadataBuilder.fromArrayMetadata(metadata)
219255
.withShape(newShape)
220256
.build();
221257
return writeMetadata(newArrayMetadata);
222258
}
223259

260+
224261
/**
225262
* Sets the attributes of the Zarr array. It overwrites and removes any existing attributes. This
226263
* method returns a new instance of the Zarr array class and the old instance becomes invalid.
@@ -248,7 +285,8 @@ public Array setAttributes(Attributes newAttributes) throws ZarrException, IOExc
248285
* @throws IOException throws IOException if the new metadata cannot be serialized
249286
*/
250287
public Array updateAttributes(Function<Attributes, Attributes> attributeMapper) throws ZarrException, IOException {
251-
return setAttributes(attributeMapper.apply(metadata.attributes));
288+
Attributes currentAttributes = metadata.attributes != null ? new Attributes(metadata.attributes) : new Attributes();
289+
return setAttributes(attributeMapper.apply(currentAttributes));
252290
}
253291

254292
@Override

src/main/java/dev/zarr/zarrjava/v3/Group.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,8 @@ private Group writeMetadata(GroupMetadata newGroupMetadata) throws IOException {
289289
* @throws IOException if the metadata cannot be serialized
290290
*/
291291
public Group updateAttributes(Function<Attributes, Attributes> attributeMapper) throws ZarrException, IOException {
292-
return setAttributes(attributeMapper.apply(metadata.attributes));
292+
Attributes currentAttributes = metadata.attributes != null ? new Attributes(metadata.attributes) : new Attributes();
293+
return setAttributes(attributeMapper.apply(currentAttributes));
293294
}
294295

295296
/**

0 commit comments

Comments
 (0)