From e75c60ea1a1537454a815f49f7eb9ccc22d9e57b Mon Sep 17 00:00:00 2001 From: Justin Kin Jun Hew Date: Fri, 20 Mar 2026 13:53:27 +1100 Subject: [PATCH 1/3] add av17 checker --- src/ccdtools/catalog.py | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/ccdtools/catalog.py b/src/ccdtools/catalog.py index c26fcf9..54551bc 100644 --- a/src/ccdtools/catalog.py +++ b/src/ccdtools/catalog.py @@ -3,12 +3,49 @@ from importlib import resources import pandas as pd import warnings +import os +import socket from . import loaders # Ensure UserWarnings are always shown warnings.simplefilter('always', UserWarning) +# NCI Gadi and av17 project access utilities +def _is_on_gadi(): + """ + Check if running on NCI Gadi supercomputer. + + Detection is based on hostname pattern or NCI-specific environment variables. + + Returns + ------- + bool + True if running on Gadi, False otherwise. + """ + hostname = socket.gethostname() + # Check hostname patterns (login nodes, compute nodes) + if hostname.startswith('gadi') or hostname.startswith('nid'): + return True + # Check NCI-specific environment variables + if os.environ.get('PBS_JOBFS') is not None: + return True + if os.environ.get('PROJECT') is not None and os.path.exists('/g/data'): + return True + return False + +def _check_av17_access(): + """ + Check if /g/data/av17 is accessible (user has joined av17 project). + + Returns + ------- + bool + True if av17 is accessible, False otherwise. + """ + av17_path = Path('/g/data/av17') + return av17_path.exists() and os.access(av17_path, os.R_OK) + class DataCatalog: """ A catalog for managing and loading datasets with versioning and subdataset support. @@ -85,6 +122,15 @@ def __init__(self, yaml_path = None): self.config = self._load_yaml(self.config_file) self.datasets = self._list_datasets() self._df_summary = self.datasets + + # Check av17 access on Gadi + if _is_on_gadi() and not _check_av17_access(): + warnings.warn( + "Running on Gadi but /g/data/av17 is not accessible. " + "You need to join NCI project 'av17' to access CCD datasets.\n" + "Apply at: https://my.nci.org.au/mancini/project/av17", + UserWarning + ) def _repr_html_(self): """ @@ -472,6 +518,17 @@ def _recursive_find_files(self, root, extension, ignore_dirs = None, ignore_file # Convert root to Path object root = Path(root) + # Check if root exists; provide helpful error if av17 is inaccessible + if not root.exists(): + if '/g/data/av17' in str(root) and _is_on_gadi(): + raise FileNotFoundError( + f"Path not accessible: {root}\n" + "You need to join NCI project 'av17' to access CCD datasets.\n" + "Apply at: https://my.nci.org.au/mancini/project/av17" + ) + else: + raise FileNotFoundError(f"Path not found: {root}") + # Ensure provided extension does not start with dot ext = extension.lstrip(".") @@ -705,6 +762,13 @@ def load_dataset(self, dataset, version = None, subdataset = None, **kwargs): # Load dataset from the single matching row row = subset.iloc[0] + + # Check av17 access before loading if on Gadi + if _is_on_gadi() and not _check_av17_access(): + raise PermissionError( + "Cannot access /g/data/av17. You need to join NCI project 'av17' " + "to access CCD datasets.\nApply at: https://my.nci.org.au/mancini/project/av17" + ) # Check any additional keywords against the row self._check_keywords(row, kwargs) From d92e9fbf901ac25336c12b416a78e6f50225e018 Mon Sep 17 00:00:00 2001 From: Justin Kin Jun Hew Date: Fri, 20 Mar 2026 13:57:01 +1100 Subject: [PATCH 2/3] are session --- src/ccdtools/catalog.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ccdtools/catalog.py b/src/ccdtools/catalog.py index 54551bc..f2d4d94 100644 --- a/src/ccdtools/catalog.py +++ b/src/ccdtools/catalog.py @@ -128,6 +128,7 @@ def __init__(self, yaml_path = None): warnings.warn( "Running on Gadi but /g/data/av17 is not accessible. " "You need to join NCI project 'av17' to access CCD datasets.\n" + "If running on ARE, you'll need to include the av17 project in your allocation.\n" "Apply at: https://my.nci.org.au/mancini/project/av17", UserWarning ) From e0ff3500ff76cfbf5d7cce42ac032eeea100f460 Mon Sep 17 00:00:00 2001 From: Justin Kin Jun Hew Date: Fri, 20 Mar 2026 14:17:57 +1100 Subject: [PATCH 3/3] trigger earlier --- src/ccdtools/catalog.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ccdtools/catalog.py b/src/ccdtools/catalog.py index f2d4d94..305a1f6 100644 --- a/src/ccdtools/catalog.py +++ b/src/ccdtools/catalog.py @@ -119,19 +119,19 @@ def __init__(self, yaml_path = None): f"Provide a valid path, or omit the argument to use the " f"default packaged catalog." ) - self.config = self._load_yaml(self.config_file) - self.datasets = self._list_datasets() - self._df_summary = self.datasets - # Check av17 access on Gadi + # Check av17 access on Gadi BEFORE loading datasets if _is_on_gadi() and not _check_av17_access(): - warnings.warn( - "Running on Gadi but /g/data/av17 is not accessible. " + raise PermissionError( + "Running on Gadi but /g/data/av17 is not accessible.\n" "You need to join NCI project 'av17' to access CCD datasets.\n" "If running on ARE, you'll need to include the av17 project in your allocation.\n" - "Apply at: https://my.nci.org.au/mancini/project/av17", - UserWarning + "Apply at: https://my.nci.org.au/mancini/project/av17" ) + + self.config = self._load_yaml(self.config_file) + self.datasets = self._list_datasets() + self._df_summary = self.datasets def _repr_html_(self): """