Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastMONAI/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.1"
__version__ = "0.8.2"
13 changes: 7 additions & 6 deletions fastMONAI/dataset_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,11 @@ def round_to_divisor(val, div):
def preprocess_dataset(df, img_col, mask_col=None, output_dir='preprocessed',
target_spacing=None, apply_reorder=True, transforms=None,
max_workers=4, skip_existing=True):
"""Preprocess dataset to disk and update DataFrame path columns in-place.
"""Preprocess dataset to disk, creating new columns for preprocessed paths.

Processes images (and optionally masks) through a transform pipeline,
saves to output_dir, then updates df[img_col] and df[mask_col] in-place
to point to the preprocessed files.
saves to output_dir, then creates new '{col}_preprocessed' columns in
the DataFrame. Original columns are preserved unchanged.

Transform pipeline order:
CopyAffine (if masks) -> ToCanonical (if apply_reorder)
Expand Down Expand Up @@ -678,10 +678,11 @@ def _process_case(item):
failed_cases.append(Path(item['img_path']).name)
warnings.warn(f"Failed to process {item['img_path']}: {e}")

# Update DataFrame in-place
df[img_col] = [str(img_dir / Path(p).name) for p in df[img_col]]
# Create new columns for preprocessed paths (preserve originals)
df[f'{img_col}_preprocessed'] = [str(img_dir / Path(p).name) for p in df[img_col]]

if mask_col is not None:
df[mask_col] = [str(mask_dir / Path(p).name) for p in df[mask_col]]
df[f'{mask_col}_preprocessed'] = [str(mask_dir / Path(p).name) for p in df[mask_col]]

print(f"Preprocessing complete: {processed} processed, {skipped} skipped, {failed} failed")
if failed_cases:
Expand Down
1 change: 1 addition & 0 deletions fastMONAI/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def _extract_patch_config(learn) -> dict:
'aggregation_mode': patch_config.aggregation_mode,
'padding_mode': patch_config.padding_mode,
'keep_largest_component': patch_config.keep_largest_component,
'preprocessed': patch_config.preprocessed,
}
else:
config['patch_config'] = None
Expand Down
28 changes: 19 additions & 9 deletions fastMONAI/vision_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ class PatchConfig:
training and inference. Defaults to True (the common case).
target_spacing: Target voxel spacing [x, y, z] for resampling. Must match between
training and inference.
preprocessed: If True, data has been preprocessed externally (e.g., via
preprocess_dataset()). Training will skip reorder, resample, AND
pre_patch_tfms (e.g., normalization) since they were already applied.
Inference is unaffected and always applies pre_inference_tfms to raw
images. Defaults to False.
padding_mode: Padding mode for CropOrPad when image < patch_size. Default is 0 (zero padding)
to align with nnU-Net's approach. Can be int, float, or string (e.g., 'minimum', 'mean').
keep_largest_component: If True, keep only the largest connected component
Expand All @@ -142,6 +147,7 @@ class PatchConfig:
# Preprocessing parameters - must match between training and inference
apply_reorder: bool = True # Defaults to True (the common case)
target_spacing: list = None
preprocessed: bool = False # True = data already preprocessed, skip all preprocessing during training
padding_mode: int | float | str = 0 # Zero padding (nnU-Net standard)
# Post-processing (binary segmentation only)
keep_largest_component: bool = False
Expand Down Expand Up @@ -653,6 +659,8 @@ def from_df(
pre_patch_tfms: TorchIO transforms applied before patch extraction
(after reorder/resample). Example: [tio.ZNormalization()].
Accepts both fastMONAI wrappers and raw TorchIO transforms.
Skipped when preprocessed=True (include in preprocess_dataset()
transforms instead). Still needed for inference via pre_inference_tfms.
patch_tfms: TorchIO transforms applied to extracted patches (training only).
Mutually exclusive with gpu_augmentation.
gpu_augmentation: GpuPatchAugmentation instance for GPU-batched augmentation
Expand Down Expand Up @@ -725,17 +733,19 @@ def from_df(
# Build preprocessing transforms
all_pre_tfms = []

# Add reorder transform (reorder to RAS+ orientation)
if _apply_reorder:
all_pre_tfms.append(tio.ToCanonical())
# Skip all preprocessing if data was already preprocessed externally
if not patch_config.preprocessed:
# Add reorder transform (reorder to RAS+ orientation)
if _apply_reorder:
all_pre_tfms.append(tio.ToCanonical())

# Add resample transform
if _target_spacing is not None:
all_pre_tfms.append(tio.Resample(_target_spacing))
# Add resample transform
if _target_spacing is not None:
all_pre_tfms.append(tio.Resample(_target_spacing))

# Add user-provided transforms (normalize to raw TorchIO transforms)
if pre_patch_tfms:
all_pre_tfms.extend(normalize_patch_transforms(pre_patch_tfms))
# Add user-provided transforms (normalize to raw TorchIO transforms)
if pre_patch_tfms:
all_pre_tfms.extend(normalize_patch_transforms(pre_patch_tfms))

# Create subjects datasets with lazy loading (paths only, ~0 MB)
train_subjects = create_subjects_dataset(
Expand Down
2 changes: 1 addition & 1 deletion nbs/07_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@
"id": "czquspt567w",
"metadata": {},
"outputs": [],
"source": "#| export\ndef _detect_patch_workflow(dls) -> bool:\n \"\"\"Detect if DataLoaders are patch-based (MedPatchDataLoaders).\n \n Args:\n dls: DataLoaders instance\n \n Returns:\n True if dls is a MedPatchDataLoaders instance\n \"\"\"\n return hasattr(dls, 'patch_config') or hasattr(dls, '_patch_config')\n\n\ndef _extract_size_from_transforms(tfms) -> list | None:\n \"\"\"Extract target size from PadOrCrop transform if present.\n \n Args:\n tfms: List of transforms\n \n Returns:\n Target size as list, or None if not found\n \"\"\"\n if tfms is None:\n return None\n for tfm in tfms:\n if hasattr(tfm, 'pad_or_crop') and hasattr(tfm.pad_or_crop, 'target_shape'):\n return list(tfm.pad_or_crop.target_shape)\n return None\n\n\ndef _extract_standard_config(learn) -> dict:\n \"\"\"Extract config from standard MedDataBlock workflow.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Dictionary with extracted configuration\n \"\"\"\n from fastMONAI.vision_core import MedBase\n dls = learn.dls\n\n # Get preprocessing from MedBase class attributes\n apply_reorder = MedBase.apply_reorder\n target_spacing = MedBase.target_spacing\n\n # Extract item_tfms from DataLoaders pipeline\n item_tfms = []\n if hasattr(dls, 'after_item') and dls.after_item:\n item_tfms = list(dls.after_item.fs)\n\n # Extract size from PadOrCrop transform\n size = _extract_size_from_transforms(item_tfms)\n\n return {\n 'apply_reorder': apply_reorder,\n 'target_spacing': target_spacing,\n 'size': size,\n 'item_tfms': item_tfms,\n 'batch_size': dls.bs,\n 'patch_config': None,\n }\n\n\ndef _extract_patch_config(learn) -> dict:\n \"\"\"Extract config from MedPatchDataLoaders workflow.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Dictionary with extracted configuration including patch-specific params\n \"\"\"\n dls = learn.dls\n patch_config = getattr(dls, '_patch_config', None) or getattr(dls, 'patch_config', None)\n\n config = {\n 'apply_reorder': getattr(dls, '_apply_reorder', patch_config.apply_reorder if patch_config else False),\n 'target_spacing': getattr(dls, '_target_spacing', patch_config.target_spacing if patch_config else None),\n 'size': patch_config.patch_size if patch_config else None,\n 'item_tfms': getattr(dls, '_pre_patch_tfms', []) or [],\n 'batch_size': dls.bs,\n }\n\n # Add patch-specific params for logging\n if patch_config:\n config['patch_config'] = {\n 'patch_size': patch_config.patch_size,\n 'patch_overlap': patch_config.patch_overlap,\n 'samples_per_volume': patch_config.samples_per_volume,\n 'sampler_type': patch_config.sampler_type,\n 'label_probabilities': str(patch_config.label_probabilities) if patch_config.label_probabilities else None,\n 'queue_length': patch_config.queue_length,\n 'aggregation_mode': patch_config.aggregation_mode,\n 'padding_mode': patch_config.padding_mode,\n 'keep_largest_component': patch_config.keep_largest_component,\n }\n else:\n config['patch_config'] = None\n\n return config\n\n\ndef _extract_loss_name(learn) -> str:\n \"\"\"Extract loss function name from Learner.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Name of the loss function\n \"\"\"\n loss_func = learn.loss_func\n # Handle CustomLoss wrapper\n if hasattr(loss_func, 'loss_func'):\n inner = loss_func.loss_func\n return inner._get_name() if hasattr(inner, '_get_name') else inner.__class__.__name__\n return loss_func._get_name() if hasattr(loss_func, '_get_name') else loss_func.__class__.__name__\n\n\ndef _extract_model_name(learn) -> str:\n \"\"\"Extract model architecture name from Learner.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Name of the model architecture\n \"\"\"\n model = learn.model\n return model._get_name() if hasattr(model, '_get_name') else model.__class__.__name__"
"source": "#| export\ndef _detect_patch_workflow(dls) -> bool:\n \"\"\"Detect if DataLoaders are patch-based (MedPatchDataLoaders).\n \n Args:\n dls: DataLoaders instance\n \n Returns:\n True if dls is a MedPatchDataLoaders instance\n \"\"\"\n return hasattr(dls, 'patch_config') or hasattr(dls, '_patch_config')\n\n\ndef _extract_size_from_transforms(tfms) -> list | None:\n \"\"\"Extract target size from PadOrCrop transform if present.\n \n Args:\n tfms: List of transforms\n \n Returns:\n Target size as list, or None if not found\n \"\"\"\n if tfms is None:\n return None\n for tfm in tfms:\n if hasattr(tfm, 'pad_or_crop') and hasattr(tfm.pad_or_crop, 'target_shape'):\n return list(tfm.pad_or_crop.target_shape)\n return None\n\n\ndef _extract_standard_config(learn) -> dict:\n \"\"\"Extract config from standard MedDataBlock workflow.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Dictionary with extracted configuration\n \"\"\"\n from fastMONAI.vision_core import MedBase\n dls = learn.dls\n\n # Get preprocessing from MedBase class attributes\n apply_reorder = MedBase.apply_reorder\n target_spacing = MedBase.target_spacing\n\n # Extract item_tfms from DataLoaders pipeline\n item_tfms = []\n if hasattr(dls, 'after_item') and dls.after_item:\n item_tfms = list(dls.after_item.fs)\n\n # Extract size from PadOrCrop transform\n size = _extract_size_from_transforms(item_tfms)\n\n return {\n 'apply_reorder': apply_reorder,\n 'target_spacing': target_spacing,\n 'size': size,\n 'item_tfms': item_tfms,\n 'batch_size': dls.bs,\n 'patch_config': None,\n }\n\n\ndef _extract_patch_config(learn) -> dict:\n \"\"\"Extract config from MedPatchDataLoaders workflow.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Dictionary with extracted configuration including patch-specific params\n \"\"\"\n dls = learn.dls\n patch_config = getattr(dls, '_patch_config', None) or getattr(dls, 'patch_config', None)\n\n config = {\n 'apply_reorder': getattr(dls, '_apply_reorder', patch_config.apply_reorder if patch_config else False),\n 'target_spacing': getattr(dls, '_target_spacing', patch_config.target_spacing if patch_config else None),\n 'size': patch_config.patch_size if patch_config else None,\n 'item_tfms': getattr(dls, '_pre_patch_tfms', []) or [],\n 'batch_size': dls.bs,\n }\n\n # Add patch-specific params for logging\n if patch_config:\n config['patch_config'] = {\n 'patch_size': patch_config.patch_size,\n 'patch_overlap': patch_config.patch_overlap,\n 'samples_per_volume': patch_config.samples_per_volume,\n 'sampler_type': patch_config.sampler_type,\n 'label_probabilities': str(patch_config.label_probabilities) if patch_config.label_probabilities else None,\n 'queue_length': patch_config.queue_length,\n 'aggregation_mode': patch_config.aggregation_mode,\n 'padding_mode': patch_config.padding_mode,\n 'keep_largest_component': patch_config.keep_largest_component,\n 'preprocessed': patch_config.preprocessed,\n }\n else:\n config['patch_config'] = None\n\n return config\n\n\ndef _extract_loss_name(learn) -> str:\n \"\"\"Extract loss function name from Learner.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Name of the loss function\n \"\"\"\n loss_func = learn.loss_func\n # Handle CustomLoss wrapper\n if hasattr(loss_func, 'loss_func'):\n inner = loss_func.loss_func\n return inner._get_name() if hasattr(inner, '_get_name') else inner.__class__.__name__\n return loss_func._get_name() if hasattr(loss_func, '_get_name') else loss_func.__class__.__name__\n\n\ndef _extract_model_name(learn) -> str:\n \"\"\"Extract model architecture name from Learner.\n \n Args:\n learn: fastai Learner instance\n \n Returns:\n Name of the model architecture\n \"\"\"\n model = learn.model\n return model._get_name() if hasattr(model, '_get_name') else model.__class__.__name__"
},
{
"cell_type": "code",
Expand Down
Loading