From 35e56538167fbfabf32c5c595f6f187ec6c6461e Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Wed, 18 Mar 2026 15:11:37 +1100 Subject: [PATCH 1/9] Add DOI support and update metadata Expose dataset DOIs in the catalog and update repository metadata. DataCatalog now resolves a "doi" field (default None) and includes it in dataset records/DataFrame for both subdatasets and versioned datasets. datasets.yaml was updated to include placeholder DOIs (10.000/ABCD) for many entries. .zenodo.json creators, title, description and keywords were also updated to reflect the project and contributor changes. --- .zenodo.json | 18 ++++++++++++------ src/ccdtools/catalog.py | 7 ++++++- src/ccdtools/config/datasets.yaml | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index b132eb2..a9b49f4 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,22 +1,28 @@ { "creators": [ { - "orcid": "0000-0003-3891-5444", + "orcid": "0000-0001-6541-2768", "affiliation": "ACCESS-NRI", - "name": "Beucher, Romain" + "name": "Bird, Lawrence" }, { + "orcid": "0000-0002-5238-6115", "affiliation": "ACCESS-NRI", - "name": "Carouge, Claire" + "name": "Kin Jun Hew, Justin" + }, + { + "orcid": "0000-0002-2320-4239", + "affiliation": "ACCESS-NRI", + "name": "Tetley, Michael G." } ], "license": "Apache-2.0", - "title": "ACCESS-NRI Template", - "decription": "Template repository for ACCESS-NRI, test", + "title": "ACCESS-NRI ccdtools", + "description": "Python API for the ACCESS-NRI Cryosphere Community Datapool (CCD)", - "keywords": ["Climate", "Science", "ACCESS-NRI", "NCI"], + "keywords": ["Cryosphere", "Ice sheet modelling", "Climate", "Science", "ACCESS-NRI", "NCI"], "communities": [ {"identifier": "access-nri"} diff --git a/src/ccdtools/catalog.py b/src/ccdtools/catalog.py index fcf0852..aa5c8b6 100644 --- a/src/ccdtools/catalog.py +++ b/src/ccdtools/catalog.py @@ -315,7 +315,8 @@ def _list_datasets(self): DataFrame with columns: ``dataset``, ``display_name``, ``description``, ``tags``, ``version``, ``subdataset``, ``path``, ``full_path``, ``extension``, ``skip_lines``, ``no_data_value``, ``ignore_dirs``, - ``ignore_files``, ``loader``, ``resolutions``, ``static_patterns``. + ``ignore_files``, ``loader``, ``resolutions``, ``static_patterns``, + ``doi``. """ # Initialize list to hold dataset records @@ -356,6 +357,7 @@ def _list_datasets(self): loader = self._resolve_metadata(meta, subds_meta, version, "loader", "default") resolutions = self._resolve_metadata(meta, subds_meta, version, "resolutions") static_patterns = self._resolve_metadata(meta, subds_meta, version, "static_patterns", []) + doi = self._resolve_metadata(meta, subds_meta, version, "doi", None) # Normalise lists as needed ignore_dirs = self._normalise_list(ignore_dirs) @@ -389,6 +391,7 @@ def _list_datasets(self): "loader": loader, "resolutions": resolutions, "static_patterns": static_patterns, + "doi": doi, }) # VERSIONED DATASETS (no subdatasets) @@ -406,6 +409,7 @@ def _list_datasets(self): loader = self._resolve_metadata(meta, None, version, "loader", "default") resolutions = self._resolve_metadata(meta, None, version, "resolutions") static_patterns = self._resolve_metadata(meta, None, version, "static_patterns", []) + doi = self._resolve_metadata(meta, None, version, "doi", None) # Normalise lists as needed ignore_dirs = self._normalise_list(ignore_dirs) @@ -437,6 +441,7 @@ def _list_datasets(self): "loader": loader, "resolutions": resolutions, "static_patterns": static_patterns, + "doi": doi, }) return pd.DataFrame(records) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index bdce682..3be2d86 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -2,6 +2,7 @@ datasets: # ELEVATION AND GEOMETRY DATASETS measures_bedmachine_antarctica: + doi: 10.000/ABCD display_name: BedMachine Antarctica path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/measures_bedmachine_antarctica description: High-resolution bed topography and ice thickness data for Antarctica. @@ -15,6 +16,7 @@ datasets: extension: nc bedmap: + doi: 10.000/ABCD display_name: Bedmap path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/bedmap description: Gridded, geospatial, and point datasets of Antarctic ice thickness, surface elevation, and bed elevation. @@ -73,6 +75,7 @@ datasets: - bm3_streamlines_pt measures_its_live_antarctica_annual_ice_mask: + doi: 10.000/ABCD display_name: ITS_LIVE Annual 240 m Ice Sheet Extent Mask - 1997 to 2021 path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/measures_its_live_antarctic_annual_240m_ice_sheet_extent_masks_1997_2021 description: Annual ice sheet extent masks for Antarctica from ITS_LIVE at 240 m resolution, covering the years 1997 to 2021. Ice thickness and velocity data used to determine the presence of ice are also provided. @@ -84,6 +87,7 @@ datasets: extension: nc measures_its_live_antarctic_grounded_ice_sheet_elevation_change: + doi: 10.000/ABCD display_name: ITS_LIVE Antarctic Ice Sheet grounded ice elevation change path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/measures_its_live_antarctic_grounded_ice_sheet_elevation_change description: ITS_LIVE Antarctic Ice Sheet grounded ice elevation change @@ -94,6 +98,7 @@ datasets: extension: nc measures_its_live_antarctic_quarterly_ice_shelf_height_change: + doi: 10.000/ABCD display_name: ITS_LIVE Antarctic Ice Shelf quarterly height change and basal melt rates - 1992 to 2017 path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/measures_its_live_antarctic_quarterly_1920m_ice_shelf_height_change_and_basal_melt_rates_1992_2017 description: ITS_LIVE Antarctic Ice Shelf quarterly height change and basal melt rates - 1992 to 2017 @@ -108,6 +113,7 @@ datasets: # GEOSPTATIAL DATASETS measures_antarctic_boundaries: + doi: 10.000/ABCD display_name: Antarctic Boundaries for IPY 2007-2009 from Satellite Radar path: /g/data/av17/access-nri/cryosphere-data-pool/geospatial/measures_antarctic_boundaries_for_ipy_2007_2009_from_satellite_radar description: Antarctic coastline, grounding line, and ice front positions derived from satellite radar data for the International Polar Year 2007-2009. @@ -149,6 +155,7 @@ datasets: extension: tif measures_antarctic_grounding_line: + doi: 10.000/ABCD display_name: Antarctic Grounding Line from Differential Satellite Radar Interferometry path: /g/data/av17/access-nri/cryosphere-data-pool/geospatial/measures_antarctic_grounding_line_from_differential_satellite_radar_interferometry description: Grounding line positions for Antarctica derived from differential satellite radar interferometry. @@ -161,6 +168,7 @@ datasets: # BASAL FORCING DATASETS antarctic_geothermal_heat_flow_model_aq1: + doi: 10.000/ABCD display_name: Antarctic Geothermal Heat Flow Model AQ1 path: /g/data/av17/access-nri/cryosphere-data-pool/basal_forcing/antarctic_geothermal_heat_flow_model_aq1 description: Geothermal heat flow model for Antarctica (AQ1) providing estimates of basal heat flux beneath the Antarctic ice sheet. @@ -178,6 +186,7 @@ datasets: # ICE VELOCITY DATASETS measures_annual_antarctic_ice_velocity_maps: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/ice_velocity/measures_annual_antarctic_ice_velocity_maps display_name: MEaSUREs Annual Antarctic Ice Velocity Maps description: Annual ice velocity maps for Antarctica from the MEaSUREs program, providing ice flow velocities at various resolutions. @@ -188,6 +197,7 @@ datasets: loader: measures_velocity measures_insar_based_ice_velocity_maps_of_central_antarctica: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/ice_velocity/measures_insar_based_ice_velocity_maps_of_central_antarctica display_name: InSAR-based Ice Velocity Maps of Central Antarctica description: Ice velocity maps for central Antarctica derived from Interferometric Synthetic Aperture Radar (InSAR) data. @@ -199,6 +209,7 @@ datasets: loader: measures_velocity measures_insar_based_antarctica_ice_velocity_map: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/ice_velocity/measures_insar_based_antarctica_ice_velocity_map display_name: InSAR-based Antarctica Ice Velocity Map description: High-resolution ice velocity map for Antarctica derived from Interferometric Synthetic Aperture Radar (InSAR) data. @@ -209,6 +220,7 @@ datasets: extension: nc measures_insar_based_ice_velocity_of_the_amundsen_sea_embayment: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/ice_velocity/measures_insar_based_ice_velocity_of_the_amundsen_sea_embayment display_name: InSAR-based Ice Velocity of the Amundsen Sea Embayment description: Ice velocity data for the Amundsen Sea Embayment region of Antarctica derived from Interferometric Synthetic Aperture Radar (InSAR) measurements. @@ -219,6 +231,7 @@ datasets: extension: nc measures_phase_based_antarctica_ice_velocity_map: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/ice_velocity/measures_phase_based_antarctica_ice_velocity_map display_name: Phase-based Antarctica Ice Velocity Map description: High-resolution ice velocity map for Antarctica derived from phase-based measurements. @@ -228,6 +241,7 @@ datasets: extension: nc measures_its_live_regional_glacier_and_ice_sheet_surface_velocities: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/ice_velocity/measures_its_live_regional_glacier_and_ice_sheet_surface_velocities display_name: ITS_LIVE Regional Glacier and Ice Sheet Surface Velocities description: ITS_LIVE regional glacier and ice sheet surface velocities for Antarctica and Greenland. @@ -321,6 +335,7 @@ datasets: # SURFACE FORCING DATASETS racmo2.3p2_monthly_27km_1979-2022: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/surface_forcing/racmo2.3p2_monthly_27km_1979-2022 display_name: RACMO2.3p2 Monthly Surface Mass Balance and Climate Data at 27 km Resolution (1979-2022) description: Regional Atmospheric Climate Model (RACMO) version 2.3p2 monthly surface mass balance and climate data for Antarctica at 27 km resolution, covering the years 1979 to 2022. @@ -330,6 +345,7 @@ datasets: - climate data - racmo extension: nc + loader: racmo ignore_files: # Ignore static mask files that do not integrate with variable files well @@ -338,6 +354,7 @@ datasets: - TotIS_RACMO_ANT27_IMBIE2.nc racmo2.4p1_monthly_11km_1979-2023: + doi: 10.000/ABCD path: /g/data/av17/access-nri/cryosphere-data-pool/surface_forcing/racmo2.4p1_monthly_11km_1979-2023 display_name: RACMO2.4p1 Monthly Surface Mass Balance and Climate Data at 11 km Resolution (1979-2023) description: Regional Atmospheric Climate Model (RACMO) version 2.4p1 monthly surface mass balance and climate data for Antarctica at 11 km resolution, covering the years 1979 to 2023. From 8964cf43d0a6b747e444e086fe000c4c4b36a912 Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Wed, 18 Mar 2026 17:17:10 +1100 Subject: [PATCH 2/9] Add subdataset DOIs for BedMachine Antarctica Remove the top-level DOI for measures_bedmachine_antarctica and add a subdatasets block with individual DOIs for versions v1, v2, and v3. This updates dataset metadata to track DOIs per dataset version while leaving paths and other attributes unchanged. --- src/ccdtools/config/datasets.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index 3be2d86..28d7e0f 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -2,7 +2,6 @@ datasets: # ELEVATION AND GEOMETRY DATASETS measures_bedmachine_antarctica: - doi: 10.000/ABCD display_name: BedMachine Antarctica path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/measures_bedmachine_antarctica description: High-resolution bed topography and ice thickness data for Antarctica. @@ -14,6 +13,13 @@ datasets: - bed uncertainty - ice mask extension: nc + subdatasets: + v1: + doi: 10.5067/C2GFER6PTOS4 + v2: + doi: 10.5067/E1QL9HFQ7A8M + v3: + doi: 10.5067/FPSU0V1MWUB6 bedmap: doi: 10.000/ABCD From e5f1af6bccae7dba0e0a1c94e6f72d8ce2070f83 Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Wed, 18 Mar 2026 17:22:05 +1100 Subject: [PATCH 3/9] Add subpath and extension for subdatasets Update src/ccdtools/config/datasets.yaml to add 'subpath' and 'extension' entries for subdatasets v1, v2, and v3 (subpath set to the version name and extension set to 'nc') to make dataset locations and file types explicit. --- src/ccdtools/config/datasets.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index 28d7e0f..3d732fc 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -16,10 +16,16 @@ datasets: subdatasets: v1: doi: 10.5067/C2GFER6PTOS4 + subpath: v1 + extension: nc v2: doi: 10.5067/E1QL9HFQ7A8M + subpath: v2 + extension: nc v3: doi: 10.5067/FPSU0V1MWUB6 + subpath: v3 + extension: nc bedmap: doi: 10.000/ABCD From 8e66bc89f80e9984ad980917e94b8986037de49e Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Wed, 18 Mar 2026 17:25:50 +1100 Subject: [PATCH 4/9] Nest subdataset metadata under data key Wrap doi, subpath and extension for subdatasets v1, v2, and v3 under a new `data` mapping in src/ccdtools/config/datasets.yaml to standardize the subdataset schema and make room for additional metadata fields. --- src/ccdtools/config/datasets.yaml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index 3d732fc..6f9db6d 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -15,17 +15,20 @@ datasets: extension: nc subdatasets: v1: - doi: 10.5067/C2GFER6PTOS4 - subpath: v1 - extension: nc + data: + doi: 10.5067/C2GFER6PTOS4 + subpath: v1 + extension: nc v2: - doi: 10.5067/E1QL9HFQ7A8M - subpath: v2 - extension: nc + data: + doi: 10.5067/E1QL9HFQ7A8M + subpath: v2 + extension: nc v3: - doi: 10.5067/FPSU0V1MWUB6 - subpath: v3 - extension: nc + data: + doi: 10.5067/FPSU0V1MWUB6 + subpath: v3 + extension: nc bedmap: doi: 10.000/ABCD From 1f3aaffa6be24ab494af3f0c3f336dc3a112a0ba Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Wed, 18 Mar 2026 17:32:47 +1100 Subject: [PATCH 5/9] Correct subdataset keys in datasets.yaml Adjust datasets.yaml structure: remove the duplicate top-level `extension: nc` and replace the nested `data:` keys under each subdataset with explicit version keys (`v1`, `v2`, `v3`). This aligns the file with the expected config schema so each subdataset's metadata (doi, subpath, extension) is nested under its version key and can be parsed correctly. --- src/ccdtools/config/datasets.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index 6f9db6d..a2a5b66 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -12,20 +12,19 @@ datasets: - surface elevation - bed uncertainty - ice mask - extension: nc subdatasets: v1: - data: + v1: doi: 10.5067/C2GFER6PTOS4 subpath: v1 extension: nc v2: - data: + v2: doi: 10.5067/E1QL9HFQ7A8M subpath: v2 extension: nc v3: - data: + v3: doi: 10.5067/FPSU0V1MWUB6 subpath: v3 extension: nc From 5ea294ca49978f91efc15318faad19cbe3b5babf Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Wed, 18 Mar 2026 17:45:02 +1100 Subject: [PATCH 6/9] Consolidate dataset DOIs and extension Remove deeply nested subdatasets mapping and replace with a flatter structure: a single 'extension' field and a 'doi' mapping keyed by version (v1, v2, v3). This simplifies the datasets config, avoids repeating the extension for each subdataset, and centralizes DOIs for easier maintenance. No changes to the bedmap entry. --- src/ccdtools/config/datasets.yaml | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index a2a5b66..fa83b5f 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -12,22 +12,11 @@ datasets: - surface elevation - bed uncertainty - ice mask - subdatasets: - v1: - v1: - doi: 10.5067/C2GFER6PTOS4 - subpath: v1 - extension: nc - v2: - v2: - doi: 10.5067/E1QL9HFQ7A8M - subpath: v2 - extension: nc - v3: - v3: - doi: 10.5067/FPSU0V1MWUB6 - subpath: v3 - extension: nc + extension: nc + doi: + v1: 10.5067/C2GFER6PTOS4 + v2: 10.5067/E1QL9HFQ7A8M + v3: 10.5067/FPSU0V1MWUB6 bedmap: doi: 10.000/ABCD From 09d52888640950893fb3155f8721b4307799e723 Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Thu, 19 Mar 2026 08:06:57 +1100 Subject: [PATCH 7/9] Testing adding doi to subdatasets Testing adding doi to subdatasets --- src/ccdtools/config/datasets.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ccdtools/config/datasets.yaml b/src/ccdtools/config/datasets.yaml index fa83b5f..41949ef 100644 --- a/src/ccdtools/config/datasets.yaml +++ b/src/ccdtools/config/datasets.yaml @@ -19,7 +19,6 @@ datasets: v3: 10.5067/FPSU0V1MWUB6 bedmap: - doi: 10.000/ABCD display_name: Bedmap path: /g/data/av17/access-nri/cryosphere-data-pool/elevation_geometry/bedmap description: Gridded, geospatial, and point datasets of Antarctic ice thickness, surface elevation, and bed elevation. @@ -34,14 +33,17 @@ datasets: subpath: geospatial_data extension: gpkg no_data_value: -9999 + doi: 10.5285/925ac4ec-2a9d-461a-bfaa-6314eb0888c8 points: subpath: point_data extension: csv skip_lines: 18 no_data_value: -9999 + doi: 10.5285/f64815ec-4077-4432-9f55-0ce230f46029 gridded: subpath: gridded_data extension: tif + doi: 10.5285/908BB17F-467C-42BF-AE00-F03BB0FEEA23 v2: geospatial: subpath: geospatial_data From 1dbc182b70e81eeb14c9a721e59b81c5cce509b8 Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Thu, 19 Mar 2026 08:18:03 +1100 Subject: [PATCH 8/9] Print DOI in DataCatalog output Add DOI to the metadata printout in DataCatalog by inserting print(f'\nDOI: {doi}') after the version line. This ensures the catalog display includes the DOI for easier reference. --- src/ccdtools/catalog.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ccdtools/catalog.py b/src/ccdtools/catalog.py index aa5c8b6..44080b4 100644 --- a/src/ccdtools/catalog.py +++ b/src/ccdtools/catalog.py @@ -1081,6 +1081,9 @@ def help(self, dataset = None, version = None): ) print(f"\nVersion: {version}") + print(f"\nDOI: {doi}") + + # 4. Subdatasets # ------------------------------------------------------------------ From 46c2c087b48f0d43790b424520697eb9b66ab9c0 Mon Sep 17 00:00:00 2001 From: Mike Tetley Date: Thu, 19 Mar 2026 08:32:02 +1100 Subject: [PATCH 9/9] Print DOI from subset row; remove duplicate row Extract the first row from `subset` earlier to obtain and print the DOI, and remove the later redundant `row = subset.iloc[0]`. This ensures `doi` is read from the row before printing and avoids duplicating the row extraction when accessing metadata. --- src/ccdtools/catalog.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ccdtools/catalog.py b/src/ccdtools/catalog.py index 44080b4..270b9a8 100644 --- a/src/ccdtools/catalog.py +++ b/src/ccdtools/catalog.py @@ -1081,7 +1081,11 @@ def help(self, dataset = None, version = None): ) print(f"\nVersion: {version}") - print(f"\nDOI: {doi}") + + # Extract row for metadata access + row = subset.iloc[0] + doi = row.get("doi", None) + print(f"DOI: {doi}") @@ -1098,7 +1102,6 @@ def help(self, dataset = None, version = None): # 5. Capabilities (based on row metadata) # ------------------------------------------------------------------ - row = subset.iloc[0] print("\nSupported catalog keywords:") print(f" - subdataset : {'yes' if not subset.subdataset.isna().all() else 'no'}")