Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
60ba8b0
add config option to enable chunking feature
khou2020 Dec 4, 2022
f89b83f
add chunk drivere
khou2020 Dec 4, 2022
842283c
add ncmpi_var_get/set_chunk and ncmpi_var_get/set_filter
khou2020 Dec 4, 2022
9b7e90c
enable chunk driver when chunking hint is set
khou2020 Dec 4, 2022
1279cd8
rename default filter hint to nc_chunk_default_filter
khou2020 Dec 5, 2022
5d89f43
add documentation about chunked I/O driver
khou2020 Dec 5, 2022
809a408
bugfix:chunk driver build condition is ENABLE_CHUNKING
khou2020 Dec 5, 2022
b4084d0
always build chunk related APIs
khou2020 Dec 5, 2022
69650bc
bug fix: add ncchunkio to DIST_DIR
khou2020 Dec 5, 2022
bda1f75
bugfix: add new m4 scripts to extra dist
khou2020 Dec 5, 2022
5331435
bugfix: add ncchkioi_profile_timers.m4 to extra_dist
khou2020 Dec 5, 2022
349f61a
bugfix: add nchunk io to m4 include
khou2020 Dec 5, 2022
0c508de
define ENABLE_COMPRESSION if --enable-zlib or --enable-sz is set
wkliao May 8, 2025
e245b4f
add example of using chunking and compression
wkliao May 8, 2025
13bc324
run chunk_compress.c only when compression is enabled
wkliao May 8, 2025
dcc706e
Remove unused variable, initialize err to NC_NOERR
wkliao May 24, 2025
e3dbcf3
add chunking example, examples/C/chunk_io.c
wkliao Jun 19, 2025
bf0766c
Add verbose to print which processes perform compression/decompression
wkliao Jun 19, 2025
b3cc689
comment out unused printf
wkliao Jun 19, 2025
b78c8a7
test: add examples/C/chunk_2D.c which uses 2D checkerboard partitioni…
wkliao Jun 20, 2025
2bcd2d9
examples/C/chunk_2D.c one wait_all for all nonblocking reads
wkliao Jun 20, 2025
246835c
chunk_2D.c: fix use of NX by mistake
wkliao Jul 1, 2025
e129914
use MPI_COMM_SELF to call MPI_Pack and MPI_Unpack
wkliao Jul 1, 2025
852673d
bug fix: sbuf index, array of size nsend
wkliao Jul 1, 2025
8f09513
chunk: fix printing expected number of records
wkliao Jul 9, 2025
5144445
chunk_2D.c add partitioning along time for decompression
wkliao Jul 9, 2025
a1f27c2
bug fix: all ranks participate wait call whenin collective data mode
wkliao Sep 9, 2025
5ee20a3
add test program for chunking nonblocking API call
wkliao Sep 9, 2025
be25547
new example program for chunking/compression feature: chunk_compress_…
wkliao Feb 16, 2026
4324dca
rebase master branch
wkliao Feb 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ EXTRA_DIST = COPYRIGHT \
README \
RELEASE_NOTES \
m4/foreach.m4 \
m4/foreach_idx.m4 \
m4/list_len.m4 \
m4/utils.m4

# Below is a trick to build all test executables, without running them
Expand Down
133 changes: 133 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ dnl AH_TEMPLATE([ENABLE_IN_PLACE_SWAP], [Define if to enable in-place byte swap]
dnl AH_TEMPLATE([DISABLE_IN_PLACE_SWAP],[Define if to disable in-place byte swap])
AH_TEMPLATE([ENABLE_SUBFILING], [Define if to enable subfiling feature])
AH_TEMPLATE([ENABLE_NETCDF4], [Define if to enable NetCDF-4 support])
AH_TEMPLATE([ENABLE_CHUNKING], [Define if to enable chunked storage layout and chunking feature])
AH_TEMPLATE([ENABLE_ZLIB], [Define if to enable zlib chunking method])
AH_TEMPLATE([ENABLE_SZ], [Define if to enable sz chunking method])
AH_TEMPLATE([ENABLE_ADIOS], [Define if to enable ADIOS BP read feature])
AH_TEMPLATE([HDF5_VER_GE_1_10_4], [Define if HDF5 version is at least 1.10.4])
AH_TEMPLATE([NETCDF_GE_4_5_0], [Define if NetCDF version is at least 4.5.0])
Expand Down Expand Up @@ -2506,6 +2509,135 @@ fi
AC_SUBST(ENABLE_BURST_BUFFER)
AM_CONDITIONAL(ENABLE_BURST_BUFFER, [test x$enable_bbdriver = xyes])

AC_ARG_ENABLE([chunking],
[AS_HELP_STRING([--enable-chunking],
[Enable chunked chunking driver support. @<:@default: disabled@:>@])],
[enable_chunking=${enableval}], [enable_chunking=no]
)

ENABLE_CHUNKING=0
if test "x$enable_chunking" = "xyes" ; then
AC_DEFINE(ENABLE_CHUNKING)
ENABLE_CHUNKING=1
fi
AC_SUBST(ENABLE_CHUNKING)
AM_CONDITIONAL(ENABLE_CHUNKING, [test x$enable_chunking = xyes])

AC_ARG_ENABLE([zlib],
[AS_HELP_STRING([--enable-zlib],
[Enable zlib chunking method support. @<:@default: disabled@:>@])],
[enable_zlib=${enableval}], [enable_zlib=no]
)

ENABLE_ZLIB=0
if test "x$enable_zlib" = "xyes" ; then
AC_DEFINE(ENABLE_ZLIB)
ENABLE_ZLIB=1
fi
AC_SUBST(ENABLE_ZLIB)
AM_CONDITIONAL(ENABLE_ZLIB, [test x$enable_zlib = xyes])

if test "x$enable_zlib" = "xyes" ; then
ZLIB_INSTALL=""
AC_ARG_WITH(zlib,
[AS_HELP_STRING([--with-zlib=/path/to/implementation],
[installation prefix for zlib implementation])],
if test "x${withval}" = xyes; then
AC_MSG_ERROR(--with-zlib is set but the value is NULL)
else
ZLIB_INSTALL=${withval}
fi
)

if test "x${ZLIB_INSTALL}" != x ; then
CPPFLAGS+=" -I${ZLIB_INSTALL}/include"
LDFLAGS+=" -L${ZLIB_INSTALL}/lib"
LIBS+=" -lz"
fi

LIBS+=" -lm -ldl"

have_zlib=no
AC_MSG_CHECKING(ZLIB library)
AC_SEARCH_LIBS([deflate], [z], [have_zlib=yes], [have_zlib=no])
if test "x${have_zlib}" = xyes; then
AC_CHECK_HEADERS([zlib.h], [], [have_zlib=no])
fi

if test "x${have_zlib}" = xno; then
AC_MSG_ERROR([
------------------------------------------------------------
The ZLIB library and header file are required to build
PnetCDF with ZLIB chunking support. Use option
--with-zlib=/path/to/implementation
to specify the location of ZLIB build.
Stopping ...
Check 'config.log' for more information.
------------------------------------------------------------])
fi
fi

AC_ARG_ENABLE([sz],
[AS_HELP_STRING([--enable-sz],
[Enable sz chunking method support. @<:@default: disabled@:>@])],
[enable_sz=${enableval}], [enable_sz=no]
)

ENABLE_SZ=0
if test "x$enable_sz" = "xyes" ; then
AC_DEFINE(ENABLE_SZ)
ENABLE_SZ=1
fi
AC_SUBST(ENABLE_SZ)
AM_CONDITIONAL(ENABLE_SZ, [test x$enable_sz = xyes])

has_compression=0
if test "x${have_zlib}" = xyes || test "x$enable_sz" = "xyes" ; then
has_compression=1
fi
AC_DEFINE(ENABLE_COMPRESSION, [$has_compression], [Defined if compression is enabled])
AM_CONDITIONAL(ENABLE_COMPRESSION, [test x$has_compression = x1])

if test "x$enable_sz" = "xyes" ; then
SZ_INSTALL=""
AC_ARG_WITH(sz,
[AS_HELP_STRING([--with-sz=/path/to/implementation],
[installation prefix for sz implementation])],
if test "x${withval}" = xyes; then
AC_MSG_ERROR(--with-sz is set but the value is NULL)
else
SZ_INSTALL=${withval}
fi
)

if test "x${SZ_INSTALL}" != x ; then
CPPFLAGS+=" -I${SZ_INSTALL}/include"
LDFLAGS+=" -L${SZ_INSTALL}/lib"
LIBS+=" -lSZ -lzstd"
fi

LIBS+=" -lm -ldl"

have_sz=no
AC_MSG_CHECKING(SZ library)
AC_SEARCH_LIBS([deflate], [z], [have_sz=yes], [have_sz=no])
if test "x${have_sz}" = xyes; then
AC_CHECK_HEADERS([sz.h], [], [have_sz=no])
fi

if test "x${have_sz}" = xno; then
AC_MSG_ERROR([
------------------------------------------------------------
The SZ library and header file are required to build
PnetCDF with SZ chunking support. Use option
--with-sz=/path/to/implementation
to specify the location of SZ build.
Stopping ...
Check 'config.log' for more information.
------------------------------------------------------------])
fi
fi

ADIOS_INSTALL=""
AC_ARG_WITH(adios,
[AS_HELP_STRING([--with-adios@<:@=DIR@:>@],
Expand Down Expand Up @@ -2827,6 +2959,7 @@ AC_CONFIG_FILES(Makefile \
src/drivers/nc4io/Makefile \
src/drivers/ncadios/Makefile \
src/drivers/ncbbio/Makefile \
src/drivers/ncchunkio/Makefile \
src/drivers/ncfoo/Makefile \
src/binding/Makefile \
src/binding/cxx/Makefile \
Expand Down
119 changes: 119 additions & 0 deletions doc/README.Chunk.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Support variable chunking and compression

PnetCDF contains an experimental variable chunking and compression feature
for classic NetCDF files.

For details about its design and implementation, please refer to:
Hou, Kaiyuan, et al. "Supporting Data Compression in PnetCDF."
2021 IEEE International Conference on Big Data (Big Data). IEEE, 2021.

## Enable variable chunking support

* To build PnetCDF with variable chunking support
+ Add `--enable-chunking` option at the configure command line. For example,
```
./configure --prefix=/PnetCDF/install/path --enable-chunking
```
* To build deflate filter support for chunked variable
+ Add `--enable-zlib` option at the configure command line. Option
`--with-zlib` can also be used to specify the installation path of
zlib if it is not in the standard locations. For example,
```
./configure --prefix=/PnetCDF/install/path --enable-chunking --enable-zlib \
--with-zlib=/zlib/install/path
```
* To build sz filter support for chunked variable
+ Add `--enable-sz` option at the configure command line. Option
`--with-sz` can also be used to specify the installation path of
sz if it is not in the standard locations. For example,
```
./configure --prefix=/PnetCDF/install/path --enable-chunking --enable-sz \
--with-sz=/sz/install/path
```

## Enable variable chunking

To enable chunked storage layout for variables, set the file info "nc_chunking"
to "enable". The chunking feature requires 64-bit NetCDF format (CDF5).
For example,
```
MPI_Info_create(&info);
ncmpi_create(MPI_COMM_WORLD, fname, NC_64BIT_DATA, info, &ncid);
```
Alternatively, the file info can be set through the environment variable
"PNETCDF_HINTS".
```
export PNETCDF_HINTS="nc_chunking=enable"
```
When chunking is enabled, all non-scalar variables will be stored in a chunked
storage layout. Scalar variables are not chunked.

Users can also set the default filter for chunked variables. For example,
```
MPI_Info_set(info, "nc_chunk_default_filter", "zlib");
```
or
```
export PNETCDF_HINTS="nc_chunking=enable;nc_chunk_default_filter=zlib"
```
The available filter options are none (default), zlib (deflate), sz.

## Define chunk dimension of variables

Applications can use the following APIs to set and get the chunk dimension of
a variable.
```
int ncmpi_var_set_chunk (int ncid, int varid, int *chunk_dim);
int ncmpi_var_get_chunk (int ncid, int varid, int *chunk_dim);
```
For example:
```
int dim[2] = {100, 100};
int chunk_dim[2] = {10, 10};
ncmpi_def_var (ncid, name, type, 2, dim, &varid)
ncmpi_var_set_chunk (ncid, varid, chunk_dim);
```
For record variables, the chunk dimension along the record dimension is always
1.
The default chunk dimension is the dimension of the variable except for the
record dimension. By default, PnetCDF will create one chunk per record or
variable.

## Define filter for chunked variables

Applications can use the following APIs to set and get the chunk dimension of
a variable.
```
#define NC_FILTER_NONE 0
#define NC_FILTER_DEFLATE 2
#define NC_FILTER_SZ 3
int ncmpi_var_set_filter (int ncid, int varid, int filter);
int ncmpi_var_get_filter (int ncid, int varid, int *filter);
```
For example:
```
ncmpi_var_set_filter (ncid, varid, NC_FILTER_DEFLATE);
```
Valid filter values are NC_FILTER_NONE (none), NC_FILTER_DEFLATE (zlib), and
NC_FILTER_SZ (sz).


## Known problems

There are some limitations of the experimental variable chunking feature.

* Only one filter can be applied to a chunked variable. Unlike HDF5 which allows
the stacking of multiple filters on chunked datasets, the current
implementation in PnetCDF only allows a single filter to be applied to a
variable.
* No per-variable option for variable chunking. If chunking is enabled, all
non-scalar variables will be chunked even if the chunk dimension is not
defined.
* Independent variable I/O is not supported. Variable read/write (get/put)
must be collective in order to maintain data consistency of filtered chunks.
Non-blocking APIs can be used to mitigate the impact of this limitation.

Copyright (C) 2022, Northwestern University and Argonne National Laboratory

See the COPYRIGHT notice in the top-level directory.

7 changes: 6 additions & 1 deletion examples/C/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ check_PROGRAMS = collective_write \
time_var \
create_from_cdl

if ENABLE_COMPRESSION
check_PROGRAMS += chunk_compress chunk_io chunk_2D
endif

if INSTALL_EXAMPLES
example_execbin_PROGRAMS = $(check_PROGRAMS)
example_execbindir = $(exec_prefix)/pnetcdf_examples/C
Expand Down Expand Up @@ -84,7 +88,8 @@ NC_FILES = $(check_PROGRAMS:%=$(TESTOUTDIR)/%.nc) \
CLEANFILES = core core.* *.gcda *.gcno *.gcov gmon.out \
$(NC_FILES) $(TESTOUTDIR)/pthread.nc.* $(TESTOUTDIR)/testfile.nc

EXTRA_DIST = run_c_examples.sh cdl_header.txt
EXTRA_DIST = run_c_examples.sh cdl_header.txt \
parallel_run.sh chunk_compress_FLDS.c

ptest ptest4: $(check_PROGRAMS)
@echo "==========================================================="
Expand Down
Loading