Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions src/ccdtools/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,49 @@
from importlib import resources
import pandas as pd
import warnings
import os
import socket

from . import loaders

# Ensure UserWarnings are always shown
warnings.simplefilter('always', UserWarning)

# NCI Gadi and av17 project access utilities
def _is_on_gadi():
"""
Check if running on NCI Gadi supercomputer.

Detection is based on hostname pattern or NCI-specific environment variables.

Returns
-------
bool
True if running on Gadi, False otherwise.
"""
hostname = socket.gethostname()
# Check hostname patterns (login nodes, compute nodes)
if hostname.startswith('gadi') or hostname.startswith('nid'):
return True
# Check NCI-specific environment variables
if os.environ.get('PBS_JOBFS') is not None:
return True
if os.environ.get('PROJECT') is not None and os.path.exists('/g/data'):
return True
return False

def _check_av17_access():
"""
Check if /g/data/av17 is accessible (user has joined av17 project).

Returns
-------
bool
True if av17 is accessible, False otherwise.
"""
av17_path = Path('/g/data/av17')
return av17_path.exists() and os.access(av17_path, os.R_OK)

class DataCatalog:
"""
A catalog for managing and loading datasets with versioning and subdataset support.
Expand Down Expand Up @@ -82,6 +119,16 @@ def __init__(self, yaml_path = None):
f"Provide a valid path, or omit the argument to use the "
f"default packaged catalog."
)

# Check av17 access on Gadi BEFORE loading datasets
if _is_on_gadi() and not _check_av17_access():
raise PermissionError(
"Running on Gadi but /g/data/av17 is not accessible.\n"
"You need to join NCI project 'av17' to access CCD datasets.\n"
"If running on ARE, you'll need to include the av17 project in your allocation.\n"
"Apply at: https://my.nci.org.au/mancini/project/av17"
)

self.config = self._load_yaml(self.config_file)
self.datasets = self._list_datasets()
self._df_summary = self.datasets
Expand Down Expand Up @@ -472,6 +519,17 @@ def _recursive_find_files(self, root, extension, ignore_dirs = None, ignore_file
# Convert root to Path object
root = Path(root)

# Check if root exists; provide helpful error if av17 is inaccessible
if not root.exists():
if '/g/data/av17' in str(root) and _is_on_gadi():
raise FileNotFoundError(
f"Path not accessible: {root}\n"
"You need to join NCI project 'av17' to access CCD datasets.\n"
"Apply at: https://my.nci.org.au/mancini/project/av17"
)
else:
raise FileNotFoundError(f"Path not found: {root}")

# Ensure provided extension does not start with dot
ext = extension.lstrip(".")

Expand Down Expand Up @@ -705,6 +763,13 @@ def load_dataset(self, dataset, version = None, subdataset = None, **kwargs):

# Load dataset from the single matching row
row = subset.iloc[0]

# Check av17 access before loading if on Gadi
if _is_on_gadi() and not _check_av17_access():
raise PermissionError(
"Cannot access /g/data/av17. You need to join NCI project 'av17' "
"to access CCD datasets.\nApply at: https://my.nci.org.au/mancini/project/av17"
)

# Check any additional keywords against the row
self._check_keywords(row, kwargs)
Expand Down
Loading