Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions fastMONAI/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
'fastMONAI/dataset_info.py'),
'fastMONAI.dataset_info.get_class_weights': ( 'dataset_info.html#get_class_weights',
'fastMONAI/dataset_info.py'),
'fastMONAI.dataset_info.preprocess_dataset': ( 'dataset_info.html#preprocess_dataset',
'fastMONAI/dataset_info.py'),
'fastMONAI.dataset_info.suggest_patch_size': ( 'dataset_info.html#suggest_patch_size',
'fastMONAI/dataset_info.py')},
'fastMONAI.external_data': { 'fastMONAI.external_data.MURLs': ('external_data.html#murls', 'fastMONAI/external_data.py'),
Expand Down Expand Up @@ -139,6 +141,28 @@
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.CustomDictTransform.tio_transform': ( 'vision_augment.html#customdicttransform.tio_transform',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation': ( 'vision_augment.html#gpupatchaugmentation',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation.__call__': ( 'vision_augment.html#gpupatchaugmentation.__call__',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation.__init__': ( 'vision_augment.html#gpupatchaugmentation.__init__',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation.__repr__': ( 'vision_augment.html#gpupatchaugmentation.__repr__',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_affine': ( 'vision_augment.html#gpupatchaugmentation._apply_affine',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_anisotropy': ( 'vision_augment.html#gpupatchaugmentation._apply_anisotropy',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_blur': ( 'vision_augment.html#gpupatchaugmentation._apply_blur',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_flip': ( 'vision_augment.html#gpupatchaugmentation._apply_flip',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_gamma': ( 'vision_augment.html#gpupatchaugmentation._apply_gamma',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_intensity_scale': ( 'vision_augment.html#gpupatchaugmentation._apply_intensity_scale',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.GpuPatchAugmentation._apply_noise': ( 'vision_augment.html#gpupatchaugmentation._apply_noise',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.NormalizeIntensity': ( 'vision_augment.html#normalizeintensity',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.NormalizeIntensity.__init__': ( 'vision_augment.html#normalizeintensity.__init__',
Expand Down Expand Up @@ -289,12 +313,18 @@
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation._TioRandomIntensityScale.apply_transform': ( 'vision_augment.html#_tiorandomintensityscale.apply_transform',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation._build_rotation_matrix_3d': ( 'vision_augment.html#_build_rotation_matrix_3d',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation._compute_patch_aug_params': ( 'vision_augment.html#_compute_patch_aug_params',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation._create_ellipsoid_mask': ( 'vision_augment.html#_create_ellipsoid_mask',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation._foreground_masking': ( 'vision_augment.html#_foreground_masking',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.do_pad_or_crop': ( 'vision_augment.html#do_pad_or_crop',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.gpu_patch_augmentations': ( 'vision_augment.html#gpu_patch_augmentations',
'fastMONAI/vision_augmentation.py'),
'fastMONAI.vision_augmentation.suggest_patch_augmentations': ( 'vision_augment.html#suggest_patch_augmentations',
'fastMONAI/vision_augmentation.py')},
'fastMONAI.vision_core': { 'fastMONAI.vision_core.MedBase': ('vision_core.html#medbase', 'fastMONAI/vision_core.py'),
Expand Down
143 changes: 141 additions & 2 deletions fastMONAI/dataset_info.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/08_dataset_info.ipynb.

# %% auto #0
__all__ = ['MedDataset', 'suggest_patch_size', 'get_class_weights']
__all__ = ['MedDataset', 'suggest_patch_size', 'preprocess_dataset', 'get_class_weights']

# %% ../nbs/08_dataset_info.ipynb #027f016a-a80c-4842-b9dc-0bddb358a00c
from .vision_core import *
from .vision_plot import find_max_slice

from sklearn.utils.class_weight import compute_class_weight
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm
from pathlib import Path
import torchio as tio
import pandas as pd
import numpy as np
import torch
Expand Down Expand Up @@ -548,6 +550,143 @@ def round_to_divisor(val, div):

return patch_size

# %% ../nbs/08_dataset_info.ipynb #mbn5svtmzkh
def preprocess_dataset(df, img_col, mask_col=None, output_dir='preprocessed',
target_spacing=None, apply_reorder=True, transforms=None,
max_workers=4, skip_existing=True):
"""Preprocess dataset to disk and update DataFrame path columns in-place.

Processes images (and optionally masks) through a transform pipeline,
saves to output_dir, then updates df[img_col] and df[mask_col] in-place
to point to the preprocessed files.

Transform pipeline order:
CopyAffine (if masks) -> ToCanonical (if apply_reorder)
-> Resample (if target_spacing) -> user transforms

Args:
df: DataFrame with file paths.
img_col: Column name for image paths.
mask_col: Optional column name for mask paths.
output_dir: Output directory. Creates images/ and masks/ subdirectories.
target_spacing: Target voxel spacing for resampling (e.g., [1.0, 1.0, 1.0]).
apply_reorder: Whether to reorder to RAS+ canonical orientation.
transforms: Additional TorchIO or fastMONAI transforms to apply after
reordering and resampling.
max_workers: Number of parallel workers. Each worker loads a full 3D
volume into memory, so reduce for large volumes.
skip_existing: Skip files that already exist on disk (with size > 0).
"""
# Input validation
if len(df) == 0:
raise ValueError("DataFrame is empty")
if img_col not in df.columns:
raise ValueError(f"Column '{img_col}' not found in DataFrame")
if mask_col is not None and mask_col not in df.columns:
raise ValueError(f"Column '{mask_col}' not found in DataFrame")

img_names = [Path(p).name for p in df[img_col]]
if len(set(img_names)) != len(img_names):
dupes = set(n for n in img_names if img_names.count(n) > 1)
raise ValueError(f"Duplicate image file names: {dupes}")

if mask_col is not None:
mask_names = [Path(p).name for p in df[mask_col]]
if len(set(mask_names)) != len(mask_names):
dupes = set(n for n in mask_names if mask_names.count(n) > 1)
raise ValueError(f"Duplicate mask file names: {dupes}")

# Build transform pipeline (canonical order)
all_tfms = []
if mask_col is not None:
all_tfms.append(tio.CopyAffine(target='image'))
if apply_reorder:
all_tfms.append(tio.ToCanonical())
if target_spacing is not None:
all_tfms.append(tio.Resample(target_spacing))
if transforms:
all_tfms.extend([getattr(t, 'tio_transform', t) for t in transforms])
pipeline = tio.Compose(all_tfms) if all_tfms else None

# Create output directories
output_dir = Path(output_dir)
img_dir = output_dir / 'images'
img_dir.mkdir(parents=True, exist_ok=True)
if mask_col is not None:
mask_dir = output_dir / 'masks'
mask_dir.mkdir(parents=True, exist_ok=True)

# Build work items, filtering skip_existing
work_items = []
skipped = 0
for idx in range(len(df)):
img_path = df[img_col].iloc[idx]
out_img = img_dir / Path(img_path).name

mask_path = df[mask_col].iloc[idx] if mask_col is not None else None
out_mask = (mask_dir / Path(mask_path).name) if mask_col is not None else None

if skip_existing:
img_ok = out_img.exists() and out_img.stat().st_size > 0
mask_ok = out_mask is None or (out_mask.exists() and out_mask.stat().st_size > 0)
if img_ok and mask_ok:
skipped += 1
continue

work_items.append({
'idx': idx, 'img_path': img_path, 'mask_path': mask_path,
'out_img': out_img, 'out_mask': out_mask,
})

# Process cases
processed = 0
failed = 0
failed_cases = []

def _process_case(item):
subject_dict = {'image': tio.ScalarImage(item['img_path'])}
if item['mask_path'] is not None:
subject_dict['mask'] = tio.LabelMap(item['mask_path'])

subject = tio.Subject(**subject_dict)
if pipeline is not None:
subject = pipeline(subject)

# Atomic write: save to temp file (with valid NIfTI extension), then rename
out_img = item['out_img']
tmp_img = out_img.parent / f'.tmp_{out_img.name}'
subject['image'].save(str(tmp_img))
os.rename(str(tmp_img), str(out_img))

if item['out_mask'] is not None:
out_mask = item['out_mask']
tmp_mask = out_mask.parent / f'.tmp_{out_mask.name}'
subject['mask'].save(str(tmp_mask))
os.rename(str(tmp_mask), str(out_mask))

if work_items:
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {executor.submit(_process_case, item): item for item in work_items}
for future in tqdm(as_completed(futures), total=len(futures),
desc='Preprocessing'):
item = futures[future]
try:
future.result()
processed += 1
except Exception as e:
failed += 1
failed_cases.append(Path(item['img_path']).name)
warnings.warn(f"Failed to process {item['img_path']}: {e}")

# Update DataFrame in-place
df[img_col] = [str(img_dir / Path(p).name) for p in df[img_col]]
if mask_col is not None:
df[mask_col] = [str(mask_dir / Path(p).name) for p in df[mask_col]]

print(f"Preprocessing complete: {processed} processed, {skipped} skipped, {failed} failed")
if failed_cases:
print(f"Failed cases: {failed_cases}")

# %% ../nbs/08_dataset_info.ipynb #9b81f6e8-abd7-4bf6-be4c-4118986c308a
def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor:
"""Calculates and returns the class weights.
Expand Down
Loading