From df92a486547ca0c58ef6d15d68a97788b13e71ee Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 1 Jun 2022 17:12:02 +0000 Subject: [PATCH 01/29] transform works locally, coordinatess need to be propagated --- hi-ml-histopathology/.vscode/launch.json | 4 ++- hi-ml-histopathology/environment.yml | 4 ++- .../histopathology/datamodules/base_module.py | 31 ++++++++++++------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index 88a627a75..87c971032 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -47,7 +47,9 @@ "--model=histopathology.SlidesPandaImageNetMIL", "--pl_limit_batches=2", "--crossval_count=0", - "--batch_size=2" + "--batch_size=2", + "--max_bag_size=10", + "--max_bag_size_inf=10" ], "console": "integratedTerminal", }, diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index ca3dd2fbe..2bbdb9a34 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -37,7 +37,9 @@ dependencies: - jupyter-client==6.1.5 - lightning-bolts==0.4.0 - mlflow==1.17.0 - - monai==0.8.0 + # monai==0.8.0 + # Branch containing transform with coordinates + - git+https://github.com/Project-MONAI/MONAI.git@dev - more-itertools==8.10.0 - mypy==0.931 - mypy-extensions==0.4.3 diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 1fd31e11b..fcde88478 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -23,6 +23,7 @@ from monai.transforms.compose import Compose from monai.transforms.io.dictionary import LoadImaged from monai.apps.pathology.transforms import TileOnGridd +from monai.transforms import RandGridPatchd from monai.data.image_reader import WSIReader _SlidesOrTilesDataset = TypeVar('_SlidesOrTilesDataset', SlidesDataset, TilesDataset) @@ -295,21 +296,29 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas level=self.level, image_only=True, ), - TileOnGridd( - keys=slides_dataset.IMAGE_COLUMN, - tile_count=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, - tile_size=self.tile_size, - step=self.step, - random_offset=self.random_offset if stage == ModelKey.TRAIN else False, - pad_full=self.pad_full, - background_val=self.background_val, - filter_mode=self.filter_mode, - return_list_of_dicts=True, + RandGridPatchd( + keys=[slides_dataset.IMAGE_COLUMN], + patch_size=(self.tile_size, self.tile_size), + num_patches=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, + sort_fn=self.filter_mode, + pad_mode="constant", + constant_values=self.background_val, ), + #TileOnGridd( + # keys=slides_dataset.IMAGE_COLUMN, + # tile_count=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, + # tile_size=self.tile_size, + # step=self.step, + # random_offset=self.random_offset if stage == ModelKey.TRAIN else False, + # pad_full=self.pad_full, + # background_val=self.background_val, + # filter_mode=self.filter_mode, + # return_list_of_dicts=True, + #), ] ) if self.transforms_dict and self.transforms_dict[stage]: - transforms = Compose([base_transform, self.transforms_dict[stage]]).flatten() + transforms = Compose([base_transform, self.transforms_dict[stage]]).flatten() # type: ignore else: transforms = base_transform return Dataset(slides_dataset, transforms) From a7754d38c70c7341b10f99670fbdde78a1b40e93 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 7 Jun 2022 09:40:32 +0000 Subject: [PATCH 02/29] switch branch --- .../src/histopathology/datamodules/base_module.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index fcde88478..8dc5ff6f0 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -304,7 +304,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas pad_mode="constant", constant_values=self.background_val, ), - #TileOnGridd( + # TileOnGridd( # keys=slides_dataset.IMAGE_COLUMN, # tile_count=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, # tile_size=self.tile_size, @@ -314,7 +314,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas # background_val=self.background_val, # filter_mode=self.filter_mode, # return_list_of_dicts=True, - #), + # ), ] ) if self.transforms_dict and self.transforms_dict[stage]: From ce04dabf97735373430b231e64880a35760b3c41 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 7 Jun 2022 09:47:02 +0000 Subject: [PATCH 03/29] fix merge --- hi-ml-histopathology/.vscode/launch.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index 4388e9e82..1cebb3ff2 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -48,10 +48,8 @@ "--pl_limit_batches=2", "--crossval_count=0", "--batch_size=2", -<<<<<<< HEAD "--max_bag_size=10", - "--max_bag_size_inf=10" -======= + "--max_bag_size_inf=10", "--num_top_slides=2", "--num_top_tiles=2" ], @@ -71,7 +69,6 @@ "--max_bag_size_inf=4", "--num_top_slides=2", "--num_top_tiles=2" ->>>>>>> main ], "console": "integratedTerminal", }, From be25cf611004df06f6a2ba3288e5a5c221621077 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 7 Jun 2022 12:54:57 +0000 Subject: [PATCH 04/29] adding coordinates to the batch --- .../src/histopathology/models/deepmil.py | 26 +++++++++++++++++++ .../src/histopathology/utils/naming.py | 2 ++ 2 files changed, 28 insertions(+) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 98f23ea87..ea196bc72 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -337,6 +337,10 @@ def get_bag_label(labels: Tensor) -> Tensor: def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: # WARNING: This is a dummy input until we figure out tiles coordinates retrieval in the next iteration. + # batch returns ['image', , , + # , , , + # , 'original_spatial_shape', 'slices', 'patch_size', 'num_patches', 'offset'] + # import pdb; pdb.set_trace() bag_sizes = [tiles.shape[0] for tiles in batch[SlideKey.IMAGE]] results.update( { @@ -352,3 +356,25 @@ def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> ], } ) + if all(key in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_SIZE]): + results.update( + { + ResultsKey.TILE_TOP: [ + [batch[SlideKey.OFFSET][i][0]] for i, _ in enumerate(batch[SlideKey.OFFSET]) + ], + ResultsKey.TILE_LEFT: [ + [batch[SlideKey.OFFSET][i][1]] for i, _ in enumerate(batch[SlideKey.OFFSET]) + ], + ResultsKey.TILE_RIGHT: [ + [batch[SlideKey.OFFSET][i][1]] + batch[SlideKey.PATCH_SIZE][i][1] + for i, _ in enumerate(batch[SlideKey.OFFSET]) + ], + ResultsKey.TILE_BOTTOM: [ + [batch[SlideKey.OFFSET][i][0]] + batch[SlideKey.PATCH_SIZE][i][0] + for i, _ in enumerate(batch[SlideKey.OFFSET]) + ], + + } + ) + else: + rank_zero_warn(message="Offset and patch size not found in the batch, make sure to use RandGridPatch.") diff --git a/hi-ml-histopathology/src/histopathology/utils/naming.py b/hi-ml-histopathology/src/histopathology/utils/naming.py index 120358839..fcbb99b95 100644 --- a/hi-ml-histopathology/src/histopathology/utils/naming.py +++ b/hi-ml-histopathology/src/histopathology/utils/naming.py @@ -19,6 +19,8 @@ class SlideKey(str, Enum): FOREGROUND_THRESHOLD = 'foreground_threshold' METADATA = 'metadata' LOCATION = 'location' + OFFSET = 'offset' + PATCH_SIZE = 'patch_size' class TileKey(str, Enum): From 8183fb6ba53b4d5724d96bf8693fca4a4072d69e Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Thu, 9 Jun 2022 13:45:53 +0000 Subject: [PATCH 05/29] sfixing collate --- hi-ml-histopathology/.vscode/launch.json | 8 ++-- .../histopathology/datamodules/base_module.py | 10 ++--- .../src/histopathology/models/deepmil.py | 6 +-- .../src/histopathology/utils/naming.py | 5 ++- .../src/histopathology/utils/wsi_utils.py | 42 +++++++++++++++---- 5 files changed, 51 insertions(+), 20 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index 1cebb3ff2..d43e38510 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -42,16 +42,18 @@ "name": "Python: Run SlidesPandaImageNetMIL locally", "type": "python", "request": "launch", + "justMyCode": false, "program": "${workspaceFolder}/../hi-ml/src/health_ml/runner.py", "args": [ "--model=histopathology.SlidesPandaImageNetMIL", "--pl_limit_batches=2", "--crossval_count=0", "--batch_size=2", - "--max_bag_size=10", - "--max_bag_size_inf=10", + "--max_bag_size=5", + "--max_bag_size_inf=5", "--num_top_slides=2", - "--num_top_tiles=2" + "--num_top_tiles=2", + "--max_num_gpus=1", ], "console": "integratedTerminal", }, diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 8dc5ff6f0..8e52e40ea 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -15,7 +15,7 @@ from health_ml.utils.bag_utils import BagDataset, multibag_collate from health_ml.utils.common_utils import _create_generator -from histopathology.utils.wsi_utils import image_collate +from histopathology.utils.wsi_utils import array_collate from histopathology.models.transforms import LoadTilesBatchd from histopathology.datasets.base_dataset import SlidesDataset, TilesDataset from histopathology.utils.naming import ModelKey @@ -99,13 +99,13 @@ def get_splits(self) -> Tuple[_SlidesOrTilesDataset, _SlidesOrTilesDataset, _Sli raise NotImplementedError def train_dataloader(self) -> DataLoader: - return self._get_dataloader(self.train_dataset, shuffle=True, stage=ModelKey.TRAIN, **self.dataloader_kwargs) + return self.train_dataloader(self.train_dataset, shuffle=True, **self.dataloader_kwargs) def val_dataloader(self) -> DataLoader: - return self._get_dataloader(self.val_dataset, shuffle=False, stage=ModelKey.VAL, **self.dataloader_kwargs) + return self.validation_dataloader(self.val_dataset, shuffle=False, **self.dataloader_kwargs) def test_dataloader(self) -> DataLoader: - return self._get_dataloader(self.test_dataset, shuffle=False, stage=ModelKey.TEST, **self.dataloader_kwargs) + return self.test_dataloader(self.test_dataset, shuffle=False, **self.dataloader_kwargs) class TilesDataModule(HistoDataModule[TilesDataset]): @@ -330,7 +330,7 @@ def _get_dataloader(self, dataset: SlidesDataset, stage: ModelKey, shuffle: bool return DataLoader( transformed_slides_dataset, batch_size=self.batch_size, - collate_fn=image_collate, + collate_fn=array_collate, shuffle=shuffle, generator=generator, **dataloader_kwargs, diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index ea196bc72..61fb93899 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -340,8 +340,8 @@ def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> # batch returns ['image', , , # , , , # , 'original_spatial_shape', 'slices', 'patch_size', 'num_patches', 'offset'] - # import pdb; pdb.set_trace() bag_sizes = [tiles.shape[0] for tiles in batch[SlideKey.IMAGE]] + results.update( { ResultsKey.SLIDE_ID: [ @@ -366,11 +366,11 @@ def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> [batch[SlideKey.OFFSET][i][1]] for i, _ in enumerate(batch[SlideKey.OFFSET]) ], ResultsKey.TILE_RIGHT: [ - [batch[SlideKey.OFFSET][i][1]] + batch[SlideKey.PATCH_SIZE][i][1] + [batch[SlideKey.OFFSET][i][1] + batch[SlideKey.PATCH_SIZE][i][1]] for i, _ in enumerate(batch[SlideKey.OFFSET]) ], ResultsKey.TILE_BOTTOM: [ - [batch[SlideKey.OFFSET][i][0]] + batch[SlideKey.PATCH_SIZE][i][0] + [batch[SlideKey.OFFSET][i][0] + batch[SlideKey.PATCH_SIZE][i][0]] for i, _ in enumerate(batch[SlideKey.OFFSET]) ], diff --git a/hi-ml-histopathology/src/histopathology/utils/naming.py b/hi-ml-histopathology/src/histopathology/utils/naming.py index fcbb99b95..0f589e08e 100644 --- a/hi-ml-histopathology/src/histopathology/utils/naming.py +++ b/hi-ml-histopathology/src/histopathology/utils/naming.py @@ -4,6 +4,7 @@ # ------------------------------------------------------------------------------------------ from enum import Enum +from monai.utils import WSIPatchKeys class SlideKey(str, Enum): @@ -19,8 +20,10 @@ class SlideKey(str, Enum): FOREGROUND_THRESHOLD = 'foreground_threshold' METADATA = 'metadata' LOCATION = 'location' + PATCH_SIZE = WSIPatchKeys.SIZE = 'patch_size' + PATCH_LOCATION = WSIPatchKeys.LOCATION = 'patch_location' OFFSET = 'offset' - PATCH_SIZE = 'patch_size' + SHAPE = 'original_spatial_shape' class TileKey(str, Enum): diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index dfefbee38..7b4e7e271 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -1,22 +1,48 @@ import torch import numpy as np +import logging from typing import Any, List from histopathology.utils.naming import SlideKey from health_ml.utils.bag_utils import multibag_collate +from monai.utils import WSIPatchKeys +slide_metadata_keys = [ + SlideKey.IMAGE_PATH, + SlideKey.LABEL, + SlideKey.MASK, + SlideKey.METADATA, + SlideKey.SLIDE_ID, + SlideKey.MASK_PATH, + WSIPatchKeys.COUNT, + SlideKey.SHAPE, + SlideKey.OFFSET +] -def image_collate(batch: List) -> Any: + +def array_collate(batch: List) -> Any: """ - Combine instances from a list of dicts into a single dict, by stacking them along first dim + Combine instances from a list of dicts into a single dict, by stacking arrays along first dim [{'image' : 3xHxW}, {'image' : 3xHxW}, {'image' : 3xHxW}...] - > {'image' : Nx3xHxW} - followed by the default collate which will form a batch BxNx3xHxW. - The list of dicts refers to the the list of tiles produced by the TileOnGridd transform applied on a WSI. + followed by the default collate which will form a batch BxNx3xHxW. It also convert some values to tensors. + The list of dicts refers to the the list of tiles produced by GridPatch transform applied on a WSI. """ + collate_keys = [] + constant_keys = slide_metadata_keys + for key in batch[0][0].keys(): + if key not in slide_metadata_keys: + if type(batch[0][0][key]) == np.ndarray: + collate_keys.append(key) + else: + logging.warning(f'Only np.ndarray are collated - {key} value will be taken from first patch') + constant_keys.append(key) + tensor_keys = collate_keys + ['SlideKey.LABEL'] - for i, item in enumerate(batch): - data = item[0] - data[SlideKey.IMAGE] = torch.tensor(np.array([ix[SlideKey.IMAGE] for ix in item])) - data[SlideKey.LABEL] = torch.tensor(data[SlideKey.LABEL]) + for i, patch_data in enumerate(batch): + data = patch_data[0] + for key in collate_keys: + data[key] = np.array([ix[key] for ix in patch_data]) + for key in tensor_keys: + data[key] = torch.tensor(data[key]) batch[i] = data return multibag_collate(batch) From 1a93d3aacf4c1b482cd9f64bcf42c3fd83b8a4de Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Thu, 9 Jun 2022 16:08:49 +0000 Subject: [PATCH 06/29] updating transform parameters --- .../histopathology/datamodules/base_module.py | 61 +++++++++---------- .../src/histopathology/utils/naming.py | 4 +- .../src/histopathology/utils/wsi_utils.py | 3 +- 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 8e52e40ea..127155ac0 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -22,8 +22,7 @@ from monai.transforms.compose import Compose from monai.transforms.io.dictionary import LoadImaged -from monai.apps.pathology.transforms import TileOnGridd -from monai.transforms import RandGridPatchd +from monai.transforms import RandGridPatchd, GridPatchd from monai.data.image_reader import WSIReader _SlidesOrTilesDataset = TypeVar('_SlidesOrTilesDataset', SlidesDataset, TilesDataset) @@ -99,14 +98,13 @@ def get_splits(self) -> Tuple[_SlidesOrTilesDataset, _SlidesOrTilesDataset, _Sli raise NotImplementedError def train_dataloader(self) -> DataLoader: - return self.train_dataloader(self.train_dataset, shuffle=True, **self.dataloader_kwargs) + return self._get_dataloader(self.train_dataset, shuffle=True, stage=ModelKey.TRAIN, **self.dataloader_kwargs) def val_dataloader(self) -> DataLoader: - return self.validation_dataloader(self.val_dataset, shuffle=False, **self.dataloader_kwargs) + return self._get_dataloader(self.val_dataset, shuffle=False, stage=ModelKey.VAL, **self.dataloader_kwargs) def test_dataloader(self) -> DataLoader: - return self.test_dataloader(self.test_dataset, shuffle=False, **self.dataloader_kwargs) - + return self._get_dataloader(self.test_dataset, shuffle=False, stage=ModelKey.TEST, **self.dataloader_kwargs) class TilesDataModule(HistoDataModule[TilesDataset]): """Base class to load the tiles of a dataset as train, val, test sets""" @@ -251,6 +249,8 @@ def __init__( pad_full: Optional[bool] = False, background_val: Optional[int] = 255, filter_mode: Optional[str] = "min", + overlap: Optional[float] = 0, + intensity_threshold: Optional[float] = 0, **kwargs: Any, ) -> None: """ @@ -258,19 +258,20 @@ def __init__( this param is passed to the LoadImaged monai transform that loads a WSI with cucim backend :param tile_size: size of the square tile, defaults to 224 this param is passed to TileOnGridd monai transform for tiling on the fly. - :param step: step size to create overlapping tiles, defaults to None (same as tile_size) - Use a step < tile_size to create overlapping tiles, analogousely a step > tile_size will skip some chunks in - the wsi. This param is passed to TileOnGridd monai transform for tiling on the fly. :param random_offset: randomize position of the grid, instead of starting from the top-left corner, defaults to True. This param is passed to TileOnGridd monai transform for tiling on the fly. :param pad_full: pad image to the size evenly divisible by tile_size, defaults to False - This param is passed to TileOnGridd monai transform for tiling on the fly. + This param is passed to monai transform for tiling on the fly. :param background_val: the background constant to ignore background tiles (e.g. 255 for white background), defaults to 255. This param is passed to TileOnGridd monai transform for tiling on the fly. :param filter_mode: mode must be in ["min", "max", "random"]. If total number of tiles is greater than tile_count, then sort by intensity sum, and take the smallest (for min), largest (for max) or random (for - random) subset, defaults to "min" (which assumes background is high value). This param is passed to TileOnGridd + random) subset, defaults to "min" (which assumes background is high value). This param is passed to monai transform for tiling on the fly. + : param overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. + : param intensity_threshold: a value to keep only the patches whose sum of intensities are less than the + threshold. Defaults to no filtering. """ super().__init__(**kwargs) self.level = level @@ -280,43 +281,37 @@ def __init__( self.pad_full = pad_full self.background_val = background_val self.filter_mode = filter_mode - # TileOnGridd transform expects None to select all foreground tile so we hardcode max_bag_size and + # Tiling transform expects None to select all foreground tile so we hardcode max_bag_size and # max_bag_size_inf to None if set to 0 self.max_bag_size = None if self.max_bag_size == 0 else self.max_bag_size # type: ignore self.max_bag_size_inf = None if self.max_bag_size_inf == 0 else self.max_bag_size_inf # type: ignore + self.overlap = overlap + self.intensity_threshold = intensity_threshold def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Dataset: - base_transform = Compose( - [ - LoadImaged( + load_image_transform = LoadImaged( keys=slides_dataset.IMAGE_COLUMN, - reader=WSIReader, + reader=WSIReader, # type: ignore backend="cuCIM", dtype=np.uint8, level=self.level, image_only=True, - ), - RandGridPatchd( + ) + max_offset = None if (self.random_offset and stage == ModelKey.TRAIN) else 0 + random_grid_transform = RandGridPatchd( keys=[slides_dataset.IMAGE_COLUMN], - patch_size=(self.tile_size, self.tile_size), + patch_size=[self.tile_size, self.tile_size], # type: ignore num_patches=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, sort_fn=self.filter_mode, pad_mode="constant", constant_values=self.background_val, - ), - # TileOnGridd( - # keys=slides_dataset.IMAGE_COLUMN, - # tile_count=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, - # tile_size=self.tile_size, - # step=self.step, - # random_offset=self.random_offset if stage == ModelKey.TRAIN else False, - # pad_full=self.pad_full, - # background_val=self.background_val, - # filter_mode=self.filter_mode, - # return_list_of_dicts=True, - # ), - ] - ) + overlap=self.overlap, + threshold=self.intensity_threshold, + max_offset = max_offset, + pad_full = self.pad_full + ) + base_transform = Compose([load_image_transform, random_grid_transform]) + if self.transforms_dict and self.transforms_dict[stage]: transforms = Compose([base_transform, self.transforms_dict[stage]]).flatten() # type: ignore else: diff --git a/hi-ml-histopathology/src/histopathology/utils/naming.py b/hi-ml-histopathology/src/histopathology/utils/naming.py index 0f589e08e..6cbfe3841 100644 --- a/hi-ml-histopathology/src/histopathology/utils/naming.py +++ b/hi-ml-histopathology/src/histopathology/utils/naming.py @@ -20,8 +20,8 @@ class SlideKey(str, Enum): FOREGROUND_THRESHOLD = 'foreground_threshold' METADATA = 'metadata' LOCATION = 'location' - PATCH_SIZE = WSIPatchKeys.SIZE = 'patch_size' - PATCH_LOCATION = WSIPatchKeys.LOCATION = 'patch_location' + PATCH_SIZE = WSIPatchKeys.SIZE # 'patch_size' + PATCH_LOCATION = WSIPatchKeys.LOCATION # 'patch_location' OFFSET = 'offset' SHAPE = 'original_spatial_shape' diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 7b4e7e271..96dcb09db 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -15,6 +15,7 @@ SlideKey.SLIDE_ID, SlideKey.MASK_PATH, WSIPatchKeys.COUNT, + SlideKey.PATCH_SIZE, #TODO: remove in case we want to allow patches of different sizes from the same slide SlideKey.SHAPE, SlideKey.OFFSET ] @@ -36,7 +37,7 @@ def array_collate(batch: List) -> Any: else: logging.warning(f'Only np.ndarray are collated - {key} value will be taken from first patch') constant_keys.append(key) - tensor_keys = collate_keys + ['SlideKey.LABEL'] + tensor_keys = collate_keys + [SlideKey.LABEL] for i, patch_data in enumerate(batch): data = patch_data[0] From bbaa5fa39f82b8b0cbf1e7003ad960ee1074afa6 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Mon, 13 Jun 2022 16:45:58 +0000 Subject: [PATCH 07/29] method that update slide results updated, tiles mthod to be refactored for homogeneity --- hi-ml-histopathology/environment.yml | 1 - .../histopathology/datamodules/base_module.py | 8 +- .../src/histopathology/models/deepmil.py | 113 +++++++++++------- .../src/histopathology/utils/naming.py | 4 +- 4 files changed, 77 insertions(+), 49 deletions(-) diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index 95d466dc9..709706d61 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -39,7 +39,6 @@ dependencies: - jupyter-client==6.1.5 - lightning-bolts==0.4.0 - mlflow==1.17.0 - # monai==0.8.0 # Branch containing transform with coordinates - git+https://github.com/Project-MONAI/MONAI.git@dev - more-itertools==8.10.0 diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 127155ac0..ca18b22d8 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -260,8 +260,6 @@ def __init__( this param is passed to TileOnGridd monai transform for tiling on the fly. :param random_offset: randomize position of the grid, instead of starting from the top-left corner, defaults to True. This param is passed to TileOnGridd monai transform for tiling on the fly. - :param pad_full: pad image to the size evenly divisible by tile_size, defaults to False - This param is passed to monai transform for tiling on the fly. :param background_val: the background constant to ignore background tiles (e.g. 255 for white background), defaults to 255. This param is passed to TileOnGridd monai transform for tiling on the fly. :param filter_mode: mode must be in ["min", "max", "random"]. If total number of tiles is greater than @@ -278,7 +276,6 @@ def __init__( self.tile_size = tile_size self.step = step self.random_offset = random_offset - self.pad_full = pad_full self.background_val = background_val self.filter_mode = filter_mode # Tiling transform expects None to select all foreground tile so we hardcode max_bag_size and @@ -305,10 +302,9 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas sort_fn=self.filter_mode, pad_mode="constant", constant_values=self.background_val, - overlap=self.overlap, + overlap=self.overlap, #type: ignore threshold=self.intensity_threshold, - max_offset = max_offset, - pad_full = self.pad_full + max_offset=max_offset, ) base_transform = Compose([load_image_transform, random_grid_transform]) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 61fb93899..861796ad4 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -5,6 +5,7 @@ from typing import Callable, Dict, List, Optional, Sequence, Tuple from pytorch_lightning.utilities.warnings import rank_zero_warn +import numpy as np import torch from pytorch_lightning import LightningModule @@ -335,46 +336,78 @@ def get_bag_label(labels: Tensor) -> Tensor: # SlidesDataModule attributes a single label to a bag of tiles already no need to do majority voting return labels + @staticmethod + def get_empty_lists(shape: int) -> List: + return [None] * shape + + @staticmethod + def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: List) -> Tuple[int, int, int, int]: + """ computing absolute patch coordinate """ + # PATCH_LOCATION is expected to have shape [z, y, x] - here we assume 2d images + top = slide_offset[0] + patch_location[1] + bottom = slide_offset[0] + patch_location[1] + patch_size[0] + left = slide_offset[1] + patch_location[2] + right = slide_offset[1] + patch_location[2] + patch_size[1] + return top, bottom, left, right + + @staticmethod + def expand_slide_constant_metadata(id, path, n_patches): + """Duplicate metadata that is patch invariant to match the shape of other arrays""" + slide_id = id * n_patches + image_paths = path * n_patches + tile_id = [f"{id}_{tile_id}" for tile_id in range(n_patches)] + return slide_id, image_paths, tile_id + + def get_slide_patch_coordinates( + self, slide_offset: List, patches_location: List, patch_size: List + ) -> List[List, List, List, List]: + """ computing absolute coordinates for all patches in a slide""" + top, bottom, left, right = self.get_empty_lists(len(patches_location)) + for i, location in enumerate(patches_location): + top[i], bottom[i], left[i], right[i] = self.get_patch_coordinate(slide_offset, location, patch_size) + return top, bottom, left, right + + def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) -> Dict: + """compute patch-dependent and patch-invariante metadata for a single slide """ + offset = batch[SlideKey.OFFSET][index] + patches_location = batch[SlideKey.PATCH_LOCATION][index] + patch_size = batch[SlideKey.PATCH_SIZE][index] + n_patches = len(patches_location) + id = batch[SlideKey.SLIDE_ID][index] + path = batch[SlideKey.IMAGE_PATH][index] + + top, bottom, left, right = self.get_slide_patch_coordinates(offset, patches_location, patch_size) + slide_id, image_paths, tile_id = self.expand_slide_constant_metadata(id, path, n_patches) + + metadata_dict[ResultsKey.TILE_TOP] = top + metadata_dict[ResultsKey.TILE_BOTTOM] = bottom + metadata_dict[ResultsKey.TILE_LEFT] = left + metadata_dict[ResultsKey.TILE_RIGHT] = right + metadata_dict[ResultsKey.SLIDE_ID] = slide_id + metadata_dict[ResultsKey.TILE_ID] = tile_id + metadata_dict[ResultsKey.IMAGE_PATH] = image_paths + return metadata_dict + def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: - # WARNING: This is a dummy input until we figure out tiles coordinates retrieval in the next iteration. - # batch returns ['image', , , - # , , , - # , 'original_spatial_shape', 'slices', 'patch_size', 'num_patches', 'offset'] - bag_sizes = [tiles.shape[0] for tiles in batch[SlideKey.IMAGE]] - - results.update( - { - ResultsKey.SLIDE_ID: [ - [slide_id] * bag_sizes[i] for i, slide_id in enumerate(batch[SlideKey.SLIDE_ID]) - ], - ResultsKey.TILE_ID: [ - [f"{slide_id}_{tile_id}" for tile_id in range(bag_sizes[i])] - for i, slide_id in enumerate(batch[SlideKey.SLIDE_ID]) - ], - ResultsKey.IMAGE_PATH: [ - [img_path] * bag_sizes[i] for i, img_path in enumerate(batch[SlideKey.IMAGE_PATH]) - ], + import pdb; pdb.set_trace() + if all(key in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_LOCATION, SlideKey.PATCH_SIZE]): + n_slides = len(batch[SlideKey.PATCH_LOCATION]) + metadata_dict = { + ResultsKey.TILE_TOP: [], + ResultsKey.TILE_BOTTOM: [], + ResultsKey.TILE_LEFT: [], + ResultsKey.TILE_RIGHT: [], + ResultsKey.SLIDE_ID: [], + ResultsKey.TILE_ID: [], + ResultsKey.IMAGE_PATH: [], } - ) - if all(key in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_SIZE]): - results.update( - { - ResultsKey.TILE_TOP: [ - [batch[SlideKey.OFFSET][i][0]] for i, _ in enumerate(batch[SlideKey.OFFSET]) - ], - ResultsKey.TILE_LEFT: [ - [batch[SlideKey.OFFSET][i][1]] for i, _ in enumerate(batch[SlideKey.OFFSET]) - ], - ResultsKey.TILE_RIGHT: [ - [batch[SlideKey.OFFSET][i][1] + batch[SlideKey.PATCH_SIZE][i][1]] - for i, _ in enumerate(batch[SlideKey.OFFSET]) - ], - ResultsKey.TILE_BOTTOM: [ - [batch[SlideKey.OFFSET][i][0] + batch[SlideKey.PATCH_SIZE][i][0]] - for i, _ in enumerate(batch[SlideKey.OFFSET]) - ], - - } - ) + results.update(metadata_dict) + # each slide can have a different number of patches + for i in range(n_slides): + updated_metadata_dict = self.compute_slide_metadata(batch, i, metadata_dict) + for key in metadata_dict.keys(): + results[key].append(updated_metadata_dict[key]) else: - rank_zero_warn(message="Offset and patch size not found in the batch, make sure to use RandGridPatch.") + rank_zero_warn(message= + "Offset, patch location or patch size are not found in the batch, make sure to use RandGridPatch." + ) diff --git a/hi-ml-histopathology/src/histopathology/utils/naming.py b/hi-ml-histopathology/src/histopathology/utils/naming.py index 6cbfe3841..f33d996c8 100644 --- a/hi-ml-histopathology/src/histopathology/utils/naming.py +++ b/hi-ml-histopathology/src/histopathology/utils/naming.py @@ -20,8 +20,8 @@ class SlideKey(str, Enum): FOREGROUND_THRESHOLD = 'foreground_threshold' METADATA = 'metadata' LOCATION = 'location' - PATCH_SIZE = WSIPatchKeys.SIZE # 'patch_size' - PATCH_LOCATION = WSIPatchKeys.LOCATION # 'patch_location' + PATCH_SIZE = WSIPatchKeys.SIZE.value # 'patch_size' + PATCH_LOCATION = WSIPatchKeys.LOCATION.value # 'patch_location' OFFSET = 'offset' SHAPE = 'original_spatial_shape' From 5c108857141f76ec508d7dfb848d3436a337d3da Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 14 Jun 2022 16:31:59 +0000 Subject: [PATCH 08/29] shape and type results aligned to Tiles --- .../src/histopathology/models/deepmil.py | 17 ++++++++--------- .../src/histopathology/utils/output_utils.py | 2 ++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 861796ad4..d5e265c08 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -337,8 +337,8 @@ def get_bag_label(labels: Tensor) -> Tensor: return labels @staticmethod - def get_empty_lists(shape: int) -> List: - return [None] * shape + def get_empty_lists(shape: int, n: int) -> List: + return [[None] * shape] * n @staticmethod def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: List) -> Tuple[int, int, int, int]: @@ -351,21 +351,21 @@ def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: L return top, bottom, left, right @staticmethod - def expand_slide_constant_metadata(id, path, n_patches): + def expand_slide_constant_metadata(id: str, path: str, n_patches: int) -> Tuple[List, List, List]: """Duplicate metadata that is patch invariant to match the shape of other arrays""" - slide_id = id * n_patches - image_paths = path * n_patches + slide_id = [id] * n_patches + image_paths = [path] * n_patches tile_id = [f"{id}_{tile_id}" for tile_id in range(n_patches)] return slide_id, image_paths, tile_id def get_slide_patch_coordinates( self, slide_offset: List, patches_location: List, patch_size: List - ) -> List[List, List, List, List]: + ) -> Tuple[List, List, List, List]: """ computing absolute coordinates for all patches in a slide""" - top, bottom, left, right = self.get_empty_lists(len(patches_location)) + top, bottom, left, right = self.get_empty_lists(len(patches_location), 4) for i, location in enumerate(patches_location): top[i], bottom[i], left[i], right[i] = self.get_patch_coordinate(slide_offset, location, patch_size) - return top, bottom, left, right + return (top, bottom, left, right) def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) -> Dict: """compute patch-dependent and patch-invariante metadata for a single slide """ @@ -389,7 +389,6 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - return metadata_dict def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: - import pdb; pdb.set_trace() if all(key in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_LOCATION, SlideKey.PATCH_SIZE]): n_slides = len(batch[SlideKey.PATCH_LOCATION]) metadata_dict = { diff --git a/hi-ml-histopathology/src/histopathology/utils/output_utils.py b/hi-ml-histopathology/src/histopathology/utils/output_utils.py index b13057efd..729a17e36 100644 --- a/hi-ml-histopathology/src/histopathology/utils/output_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/output_utils.py @@ -68,6 +68,8 @@ def normalize_dict_for_df(dict_old: Dict[ResultsKey, Any]) -> Dict[str, Any]: value = value.squeeze(0).cpu().numpy() if value.ndim == 0: value = np.full(bag_size, fill_value=value) + if isinstance(value, List) and isinstance(value[0], torch.Tensor): + value = [value[i].item() for i in range(len(value))] dict_new[key] = value elif key == ResultsKey.CLASS_PROBS: if isinstance(value, torch.Tensor): From 585c893df48771ab8808962c8df758ae7e56d318 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 14 Jun 2022 16:32:40 +0000 Subject: [PATCH 09/29] updatingg env to pin MONAI dev commit --- hi-ml-histopathology/environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index 709706d61..bdf5e7628 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -39,8 +39,8 @@ dependencies: - jupyter-client==6.1.5 - lightning-bolts==0.4.0 - mlflow==1.17.0 - # Branch containing transform with coordinates - - git+https://github.com/Project-MONAI/MONAI.git@dev + # commit of dev branch containing transform with coordinates + - git+https://github.com/Project-MONAI/MONAI.git@df4a7d72e1d231b898f88d92cf981721c49ceaeb - more-itertools==8.10.0 - mypy==0.931 - mypy-extensions==0.4.3 From 7970037ac1f45ed1bb370ec1d861f77e227f6f93 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 15 Jun 2022 13:22:46 +0000 Subject: [PATCH 10/29] heatmaps produced locally when tiling on the fly --- .../histopathology/configs/classification/DeepSMILEPanda.py | 2 +- hi-ml-histopathology/src/histopathology/utils/wsi_utils.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py index f05990002..902ef2d13 100644 --- a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py +++ b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py @@ -134,7 +134,7 @@ def __init__(self, **kwargs: Any) -> None: # declared in DatasetParams: local_datasets=[Path("/tmp/datasets/PANDA")], azure_datasets=["PANDA"], - save_output_slides=False,) + save_output_slides=True,) default_kwargs.update(kwargs) super().__init__(**default_kwargs) diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 96dcb09db..b883945f5 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -15,7 +15,7 @@ SlideKey.SLIDE_ID, SlideKey.MASK_PATH, WSIPatchKeys.COUNT, - SlideKey.PATCH_SIZE, #TODO: remove in case we want to allow patches of different sizes from the same slide + SlideKey.PATCH_SIZE, # TODO: remove in case we want to allow patches of different sizes from the same slide SlideKey.SHAPE, SlideKey.OFFSET ] @@ -39,6 +39,7 @@ def array_collate(batch: List) -> Any: constant_keys.append(key) tensor_keys = collate_keys + [SlideKey.LABEL] + print("collate keys") for i, patch_data in enumerate(batch): data = patch_data[0] for key in collate_keys: From 792385ece77b8b47660f0d01b6399c86d28e09c2 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Mon, 20 Jun 2022 13:30:57 +0000 Subject: [PATCH 11/29] problematic slides now skipped --- .../src/histopathology/models/deepmil.py | 18 ++++++++++ .../src/histopathology/utils/wsi_utils.py | 35 +++++++++++++++---- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index d5e265c08..9131fe66f 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -358,6 +358,22 @@ def expand_slide_constant_metadata(id: str, path: str, n_patches: int) -> Tuple[ tile_id = [f"{id}_{tile_id}" for tile_id in range(n_patches)] return slide_id, image_paths, tile_id + @staticmethod + def check_patch_location_format(batch): + faulty_slides_idx = [] + for i, locations in enumerate(batch['patch_location']): + for location in locations: + if len(location) != 3: + print(f'Slide {batch[SlideKey.SLIDE_ID][i]} ' + f'will be skipped as its patches contained unexpected values in patch_location {location}') + faulty_slides_idx.append(batch[SlideKey.SLIDE_ID][i]) + break + n = len(faulty_slides_idx) + if n > 0: + print(f'{n} slides will be skipped because somethign was wrong in the patch location') + import pdb; pdb.set_trace() + return faulty_slides_idx + def get_slide_patch_coordinates( self, slide_offset: List, patches_location: List, patch_size: List ) -> Tuple[List, List, List, List]: @@ -376,6 +392,8 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - id = batch[SlideKey.SLIDE_ID][index] path = batch[SlideKey.IMAGE_PATH][index] + self.check_patch_location_format(batch) + top, bottom, left, right = self.get_slide_patch_coordinates(offset, patches_location, patch_size) slide_id, image_paths, tile_id = self.expand_slide_constant_metadata(id, path, n_patches) diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index b883945f5..8fb3e14db 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -21,6 +21,26 @@ ] +def check_patch_location_format(batch): + faulty_slides_idx = [] + for i, slide_data in enumerate(batch): + # TODO check dimension of faulty slide id here + for patch in slide_data: + #if patch[SlideKey.SLIDE_ID] == '8d5860e10e09ee25e066ee7fb699453d': + # print(patch[WSIPatchKeys.LOCATION]) + # print(len(patch[WSIPatchKeys.LOCATION])) + location = patch[WSIPatchKeys.LOCATION] + if len(location) < 3: + print(f'Slide {patch[SlideKey.SLIDE_ID]} ' + f'will be skipped as its patches contained unexpected values in patch_location {location}') + faulty_slides_idx.append(patch[SlideKey.SLIDE_ID]) + break + n = len(faulty_slides_idx) + if n > 0: + print(f'{n} slides will be skipped because somethign was wrong in the patch location') + return faulty_slides_idx + + def array_collate(batch: List) -> Any: """ Combine instances from a list of dicts into a single dict, by stacking arrays along first dim @@ -39,12 +59,15 @@ def array_collate(batch: List) -> Any: constant_keys.append(key) tensor_keys = collate_keys + [SlideKey.LABEL] - print("collate keys") + skip_idx = check_patch_location_format(batch) for i, patch_data in enumerate(batch): data = patch_data[0] - for key in collate_keys: - data[key] = np.array([ix[key] for ix in patch_data]) - for key in tensor_keys: - data[key] = torch.tensor(data[key]) - batch[i] = data + if data[SlideKey.SLIDE_ID] not in skip_idx: + for key in collate_keys: + # if not forcing a type, dtpe will be inferred as np.object in cases where the input image is + # anomalous (eg. nan values). This will raise an error when converting to tensor. + data[key] = np.array([ix[key] for ix in patch_data]) + for key in tensor_keys: + data[key] = torch.tensor(data[key]) + batch[i] = data return multibag_collate(batch) From 85bd4ecbe376dca5219148193bd6a155f7735dab Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 21 Jun 2022 12:48:14 +0000 Subject: [PATCH 12/29] bug fix runs locally --- hi-ml-histopathology/.vscode/launch.json | 18 +++++++++++++++ .../src/histopathology/utils/wsi_utils.py | 22 +++++++++---------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index d43e38510..6ccd0b155 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -57,6 +57,24 @@ ], "console": "integratedTerminal", }, + { + "name": "Python: Long Run SlidesPandaImageNetMIL locally", + "type": "python", + "request": "launch", + "justMyCode": false, + "program": "${workspaceFolder}/../hi-ml/src/health_ml/runner.py", + "args": [ + "--model=histopathology.SlidesPandaImageNetMIL", + "--crossval_count=0", + "--batch_size=50", + "--max_bag_size=10", + "--max_bag_size_inf=10", + "--num_top_slides=2", + "--num_top_tiles=2", + "--max_num_gpus=1", + ], + "console": "integratedTerminal", + }, { "name": "Python: Run TilesPandaImageNetMIL locally", "type": "python", diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 8fb3e14db..934b624b3 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -22,17 +22,16 @@ def check_patch_location_format(batch): + """ + check locations returned by transform have expected size [z, y, x] + """ faulty_slides_idx = [] - for i, slide_data in enumerate(batch): - # TODO check dimension of faulty slide id here + for slide_data in batch: for patch in slide_data: - #if patch[SlideKey.SLIDE_ID] == '8d5860e10e09ee25e066ee7fb699453d': - # print(patch[WSIPatchKeys.LOCATION]) - # print(len(patch[WSIPatchKeys.LOCATION])) location = patch[WSIPatchKeys.LOCATION] if len(location) < 3: print(f'Slide {patch[SlideKey.SLIDE_ID]} ' - f'will be skipped as its patches contained unexpected values in patch_location {location}') + f'will be skipped as its patches contained unexpected patch_location values: {location}') faulty_slides_idx.append(patch[SlideKey.SLIDE_ID]) break n = len(faulty_slides_idx) @@ -60,14 +59,15 @@ def array_collate(batch: List) -> Any: tensor_keys = collate_keys + [SlideKey.LABEL] skip_idx = check_patch_location_format(batch) - for i, patch_data in enumerate(batch): + new_batch: List[dict] = [] + for patch_data in batch: + # we assume all patches are dictionaries with the same keys data = patch_data[0] + # this is necesseary to overcome bug in RandGRidPatch, if one patch has faulty location the all slide is skipped if data[SlideKey.SLIDE_ID] not in skip_idx: for key in collate_keys: - # if not forcing a type, dtpe will be inferred as np.object in cases where the input image is - # anomalous (eg. nan values). This will raise an error when converting to tensor. data[key] = np.array([ix[key] for ix in patch_data]) for key in tensor_keys: data[key] = torch.tensor(data[key]) - batch[i] = data - return multibag_collate(batch) + new_batch.append(data) + return multibag_collate(new_batch) From 528194051bc59059772ed37a0fa4026e51c40b93 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 21 Jun 2022 14:22:05 +0000 Subject: [PATCH 13/29] reduce logging --- hi-ml-histopathology/src/histopathology/utils/wsi_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 934b624b3..3a0ab739c 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -30,13 +30,13 @@ def check_patch_location_format(batch): for patch in slide_data: location = patch[WSIPatchKeys.LOCATION] if len(location) < 3: - print(f'Slide {patch[SlideKey.SLIDE_ID]} ' - f'will be skipped as its patches contained unexpected patch_location values: {location}') + # print(f'Slide {patch[SlideKey.SLIDE_ID]} ' + # f'will be skipped as its patches contained unexpected patch_location values: {location}') faulty_slides_idx.append(patch[SlideKey.SLIDE_ID]) break n = len(faulty_slides_idx) if n > 0: - print(f'{n} slides will be skipped because somethign was wrong in the patch location') + logging.warning(f'{n} slides will be skipped because somethign was wrong in the patch location') return faulty_slides_idx From 2ea8c9481acb7da025d989d54cb2b487e2318c2b Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Mon, 27 Jun 2022 15:52:58 +0000 Subject: [PATCH 14/29] works locally skipping slide with problematic patches (1 in val on the test case) --- hi-ml-histopathology/.vscode/launch.json | 5 ++++ hi-ml-histopathology/environment.yml | 4 ++- .../src/histopathology/models/deepmil.py | 26 +++++++++---------- .../src/histopathology/utils/wsi_utils.py | 19 ++++++++------ 4 files changed, 31 insertions(+), 23 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index 6ccd0b155..748661ddb 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -72,6 +72,11 @@ "--num_top_slides=2", "--num_top_tiles=2", "--max_num_gpus=1", + "--max_epochs 2", + "--pl_limit_train_batches 20", + "--pl_limit_test_batches 20", + "--pl_limit_val_batches 20" + ], "console": "integratedTerminal", }, diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index bdf5e7628..3fd844709 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -40,7 +40,9 @@ dependencies: - lightning-bolts==0.4.0 - mlflow==1.17.0 # commit of dev branch containing transform with coordinates - - git+https://github.com/Project-MONAI/MONAI.git@df4a7d72e1d231b898f88d92cf981721c49ceaeb + # - git+https://github.com/Project-MONAI/MONAI.git@df4a7d72e1d231b898f88d92cf981721c49ceaeb + # commit of dev branch including latest fixed to GridPatch 22/06 + - git+https://github.com/Project-MONAI/MONAI.git@669bddf581201f994d1bcc0cb780854901605d9b - more-itertools==8.10.0 - mypy==0.931 - mypy-extensions==0.4.3 diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 9131fe66f..ffcc4c950 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -343,11 +343,11 @@ def get_empty_lists(shape: int, n: int) -> List: @staticmethod def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: List) -> Tuple[int, int, int, int]: """ computing absolute patch coordinate """ - # PATCH_LOCATION is expected to have shape [z, y, x] - here we assume 2d images - top = slide_offset[0] + patch_location[1] - bottom = slide_offset[0] + patch_location[1] + patch_size[0] - left = slide_offset[1] + patch_location[2] - right = slide_offset[1] + patch_location[2] + patch_size[1] + # PATCH_LOCATION is expected to have shape [y, x] + top = slide_offset[0] + patch_location[0] + bottom = slide_offset[0] + patch_location[0] + patch_size[0] + left = slide_offset[1] + patch_location[1] + right = slide_offset[1] + patch_location[1] + patch_size[1] return top, bottom, left, right @staticmethod @@ -361,17 +361,16 @@ def expand_slide_constant_metadata(id: str, path: str, n_patches: int) -> Tuple[ @staticmethod def check_patch_location_format(batch): faulty_slides_idx = [] - for i, locations in enumerate(batch['patch_location']): + for i, locations in enumerate(batch[SlideKey.PATCH_LOCATION]): for location in locations: - if len(location) != 3: + if len(location) != 2: print(f'Slide {batch[SlideKey.SLIDE_ID][i]} ' - f'will be skipped as its patches contained unexpected values in patch_location {location}') + f'will be skipped as its patches contained unexpected values in patch_location {location}') faulty_slides_idx.append(batch[SlideKey.SLIDE_ID][i]) break n = len(faulty_slides_idx) if n > 0: print(f'{n} slides will be skipped because somethign was wrong in the patch location') - import pdb; pdb.set_trace() return faulty_slides_idx def get_slide_patch_coordinates( @@ -392,7 +391,7 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - id = batch[SlideKey.SLIDE_ID][index] path = batch[SlideKey.IMAGE_PATH][index] - self.check_patch_location_format(batch) + # self.check_patch_location_format(batch) top, bottom, left, right = self.get_slide_patch_coordinates(offset, patches_location, patch_size) slide_id, image_paths, tile_id = self.expand_slide_constant_metadata(id, path, n_patches) @@ -408,7 +407,7 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: if all(key in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_LOCATION, SlideKey.PATCH_SIZE]): - n_slides = len(batch[SlideKey.PATCH_LOCATION]) + n_slides = len(batch[SlideKey.SLIDE_ID]) metadata_dict = { ResultsKey.TILE_TOP: [], ResultsKey.TILE_BOTTOM: [], @@ -425,6 +424,5 @@ def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> for key in metadata_dict.keys(): results[key].append(updated_metadata_dict[key]) else: - rank_zero_warn(message= - "Offset, patch location or patch size are not found in the batch, make sure to use RandGridPatch." - ) + rank_zero_warn(message="Offset, patch location or patch size are not found in the batch" + "make sure to use RandGridPatch.") diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 3a0ab739c..dac9f77b2 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -28,15 +28,14 @@ def check_patch_location_format(batch): faulty_slides_idx = [] for slide_data in batch: for patch in slide_data: - location = patch[WSIPatchKeys.LOCATION] - if len(location) < 3: - # print(f'Slide {patch[SlideKey.SLIDE_ID]} ' - # f'will be skipped as its patches contained unexpected patch_location values: {location}') + location = patch[SlideKey.PATCH_LOCATION] + if not isinstance(location[0], np.uint8): + # we assume the location is 2d [y, x] but MONAI sometimes returns [[0], [0]] instead faulty_slides_idx.append(patch[SlideKey.SLIDE_ID]) break n = len(faulty_slides_idx) if n > 0: - logging.warning(f'{n} slides will be skipped because somethign was wrong in the patch location') + logging.warning(f'{n} slides will be skipped because something was wrong in the patch location') return faulty_slides_idx @@ -63,11 +62,15 @@ def array_collate(batch: List) -> Any: for patch_data in batch: # we assume all patches are dictionaries with the same keys data = patch_data[0] - # this is necesseary to overcome bug in RandGRidPatch, if one patch has faulty location the all slide is skipped + # this is necessary to overcome bug in RandGRidPatch, if one patch has faulty location the all slide is skipped if data[SlideKey.SLIDE_ID] not in skip_idx: for key in collate_keys: - data[key] = np.array([ix[key] for ix in patch_data]) + if key == SlideKey.PATCH_LOCATION: + data[key] = np.array([ix[key] for ix in patch_data if type(ix[key][0]) == np.uint8]) + else: + data[key] = np.array([ix[key] for ix in patch_data]) for key in tensor_keys: data[key] = torch.tensor(data[key]) new_batch.append(data) - return multibag_collate(new_batch) + batch = new_batch + return multibag_collate(batch) From a720a28891671c613c204fd55e26f70fbe4c0d12 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 5 Jul 2022 11:22:26 +0000 Subject: [PATCH 15/29] issue with coordinnates being equal fixed --- .../histopathology/datamodules/base_module.py | 2 +- .../src/histopathology/models/deepmil.py | 18 +++++++++++------- .../src/histopathology/utils/output_utils.py | 8 +++++++- .../src/histopathology/utils/wsi_utils.py | 11 ++++++----- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index ca18b22d8..78dcdcc94 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -302,7 +302,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas sort_fn=self.filter_mode, pad_mode="constant", constant_values=self.background_val, - overlap=self.overlap, #type: ignore + overlap=self.overlap, # type: ignore threshold=self.intensity_threshold, max_offset=max_offset, ) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index ffcc4c950..8d19764a3 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -3,6 +3,7 @@ # Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. # ------------------------------------------------------------------------------------------ +from logging import raiseExceptions from typing import Callable, Dict, List, Optional, Sequence, Tuple from pytorch_lightning.utilities.warnings import rank_zero_warn import numpy as np @@ -338,7 +339,10 @@ def get_bag_label(labels: Tensor) -> Tensor: @staticmethod def get_empty_lists(shape: int, n: int) -> List: - return [[None] * shape] * n + ll = [] + for _ in range(n): + ll.append([None] * shape) + return ll @staticmethod def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: List) -> Tuple[int, int, int, int]: @@ -380,13 +384,13 @@ def get_slide_patch_coordinates( top, bottom, left, right = self.get_empty_lists(len(patches_location), 4) for i, location in enumerate(patches_location): top[i], bottom[i], left[i], right[i] = self.get_patch_coordinate(slide_offset, location, patch_size) - return (top, bottom, left, right) + return top, bottom, left, right def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) -> Dict: """compute patch-dependent and patch-invariante metadata for a single slide """ - offset = batch[SlideKey.OFFSET][index] - patches_location = batch[SlideKey.PATCH_LOCATION][index] - patch_size = batch[SlideKey.PATCH_SIZE][index] + offset = batch[SlideKey.OFFSET.value][index] + patches_location = batch[SlideKey.PATCH_LOCATION.value][index] + patch_size = batch[SlideKey.PATCH_SIZE.value][index] n_patches = len(patches_location) id = batch[SlideKey.SLIDE_ID][index] path = batch[SlideKey.IMAGE_PATH][index] @@ -406,7 +410,7 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - return metadata_dict def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: - if all(key in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_LOCATION, SlideKey.PATCH_SIZE]): + if all(key.value in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_LOCATION, SlideKey.PATCH_SIZE]): n_slides = len(batch[SlideKey.SLIDE_ID]) metadata_dict = { ResultsKey.TILE_TOP: [], @@ -425,4 +429,4 @@ def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> results[key].append(updated_metadata_dict[key]) else: rank_zero_warn(message="Offset, patch location or patch size are not found in the batch" - "make sure to use RandGridPatch.") + "make sure to use RandGridPatch.") diff --git a/hi-ml-histopathology/src/histopathology/utils/output_utils.py b/hi-ml-histopathology/src/histopathology/utils/output_utils.py index 729a17e36..f8fee668e 100644 --- a/hi-ml-histopathology/src/histopathology/utils/output_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/output_utils.py @@ -132,11 +132,17 @@ def save_outputs_csv(results: ResultsType, outputs_dir: Path) -> None: # Collect the list of dictionaries in a list of pandas dataframe and save df_list = [] + skipped_slides = 0 for slide_dict in list_slide_dicts: slide_dict = normalize_dict_for_df(slide_dict) # type: ignore - df_list.append(pd.DataFrame.from_dict(slide_dict)) + try: + df_list.append(pd.DataFrame.from_dict(slide_dict)) + except ValueError: + skipped_slides += 1 + logging.warning(f"something wrong in the dimension of slide {slide_dict[ResultsKey.SLIDE_ID][0]}") df = pd.concat(df_list, ignore_index=True) df.to_csv(csv_filename, mode='w+', header=True) + logging.warning(f"{skipped_slides} slides have not been included in the ouputs because of issues with the outputs") def save_features(results: ResultsType, outputs_dir: Path) -> None: diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index dac9f77b2..110380b3b 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -57,7 +57,8 @@ def array_collate(batch: List) -> Any: constant_keys.append(key) tensor_keys = collate_keys + [SlideKey.LABEL] - skip_idx = check_patch_location_format(batch) + # skip_idx = check_patch_location_format(batch) + skip_idx = [] new_batch: List[dict] = [] for patch_data in batch: # we assume all patches are dictionaries with the same keys @@ -65,10 +66,10 @@ def array_collate(batch: List) -> Any: # this is necessary to overcome bug in RandGRidPatch, if one patch has faulty location the all slide is skipped if data[SlideKey.SLIDE_ID] not in skip_idx: for key in collate_keys: - if key == SlideKey.PATCH_LOCATION: - data[key] = np.array([ix[key] for ix in patch_data if type(ix[key][0]) == np.uint8]) - else: - data[key] = np.array([ix[key] for ix in patch_data]) + # if key == SlideKey.PATCH_LOCATION: + # data[key] = np.array([ix[key] for ix in patch_data if type(ix[key][0]) == np.int16]) + #else: + data[key] = np.array([ix[key] for ix in patch_data]) for key in tensor_keys: data[key] = torch.tensor(data[key]) new_batch.append(data) From a99632359b4f01c59ac62b1ebd8bf05dc8f0eba0 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 5 Jul 2022 12:25:50 +0000 Subject: [PATCH 16/29] cleaning up checks no longer needed --- .../src/histopathology/models/deepmil.py | 5 +-- .../src/histopathology/utils/wsi_utils.py | 37 +++---------------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 8d19764a3..d57a5f86a 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -364,6 +364,7 @@ def expand_slide_constant_metadata(id: str, path: str, n_patches: int) -> Tuple[ @staticmethod def check_patch_location_format(batch): + """Workaround for bug in MONAI that returns not consistent location""" faulty_slides_idx = [] for i, locations in enumerate(batch[SlideKey.PATCH_LOCATION]): for location in locations: @@ -374,7 +375,7 @@ def check_patch_location_format(batch): break n = len(faulty_slides_idx) if n > 0: - print(f'{n} slides will be skipped because somethign was wrong in the patch location') + print(f'{n} slides will be skipped because something was wrong in the patch location') return faulty_slides_idx def get_slide_patch_coordinates( @@ -395,8 +396,6 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - id = batch[SlideKey.SLIDE_ID][index] path = batch[SlideKey.IMAGE_PATH][index] - # self.check_patch_location_format(batch) - top, bottom, left, right = self.get_slide_patch_coordinates(offset, patches_location, patch_size) slide_id, image_paths, tile_id = self.expand_slide_constant_metadata(id, path, n_patches) diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 110380b3b..d2724b39b 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -21,24 +21,6 @@ ] -def check_patch_location_format(batch): - """ - check locations returned by transform have expected size [z, y, x] - """ - faulty_slides_idx = [] - for slide_data in batch: - for patch in slide_data: - location = patch[SlideKey.PATCH_LOCATION] - if not isinstance(location[0], np.uint8): - # we assume the location is 2d [y, x] but MONAI sometimes returns [[0], [0]] instead - faulty_slides_idx.append(patch[SlideKey.SLIDE_ID]) - break - n = len(faulty_slides_idx) - if n > 0: - logging.warning(f'{n} slides will be skipped because something was wrong in the patch location') - return faulty_slides_idx - - def array_collate(batch: List) -> Any: """ Combine instances from a list of dicts into a single dict, by stacking arrays along first dim @@ -57,21 +39,14 @@ def array_collate(batch: List) -> Any: constant_keys.append(key) tensor_keys = collate_keys + [SlideKey.LABEL] - # skip_idx = check_patch_location_format(batch) - skip_idx = [] new_batch: List[dict] = [] for patch_data in batch: # we assume all patches are dictionaries with the same keys data = patch_data[0] - # this is necessary to overcome bug in RandGRidPatch, if one patch has faulty location the all slide is skipped - if data[SlideKey.SLIDE_ID] not in skip_idx: - for key in collate_keys: - # if key == SlideKey.PATCH_LOCATION: - # data[key] = np.array([ix[key] for ix in patch_data if type(ix[key][0]) == np.int16]) - #else: - data[key] = np.array([ix[key] for ix in patch_data]) - for key in tensor_keys: - data[key] = torch.tensor(data[key]) - new_batch.append(data) - batch = new_batch + for key in collate_keys: + data[key] = np.array([ix[key] for ix in patch_data]) + for key in tensor_keys: + data[key] = torch.tensor(data[key]) + new_batch.append(data) + batch = new_batch return multibag_collate(batch) From 7550cae26950112ce59b8a045160f5a362349e09 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 5 Jul 2022 12:39:29 +0000 Subject: [PATCH 17/29] leftover rfrom merge --- hi-ml-histopathology/.vscode/launch.json | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index bf1d334b8..1654605fc 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -48,13 +48,8 @@ "--pl_fast_dev_run=10", "--crossval_count=0", "--batch_size=2", -<<<<<<< HEAD "--max_bag_size=5", "--max_bag_size_inf=5", -======= - "--max_bag_size=4", - "--max_bag_size_inf=4", ->>>>>>> main "--num_top_slides=2", "--num_top_tiles=2", "--max_num_gpus=1", From e989b4f4aff86eab2b671289218f766e8d3d3016 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Jul 2022 12:40:53 +0000 Subject: [PATCH 18/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../histopathology/datamodules/base_module.py | 35 ++++++++++--------- .../src/histopathology/models/deepmil.py | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 043191217..e299b92af 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -106,6 +106,7 @@ def val_dataloader(self) -> DataLoader: def test_dataloader(self) -> DataLoader: return self._get_dataloader(self.test_dataset, shuffle=False, stage=ModelKey.TEST, **self.dataloader_kwargs) + class TilesDataModule(HistoDataModule[TilesDataset]): """Base class to load the tiles of a dataset as train, val, test sets""" @@ -287,25 +288,25 @@ def __init__( def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Dataset: load_image_transform = LoadImaged( - keys=slides_dataset.IMAGE_COLUMN, - reader=WSIReader, # type: ignore - backend="cuCIM", - dtype=np.uint8, - level=self.level, - image_only=True, - ) + keys=slides_dataset.IMAGE_COLUMN, + reader=WSIReader, # type: ignore + backend="cuCIM", + dtype=np.uint8, + level=self.level, + image_only=True, + ) max_offset = None if (self.random_offset and stage == ModelKey.TRAIN) else 0 random_grid_transform = RandGridPatchd( - keys=[slides_dataset.IMAGE_COLUMN], - patch_size=[self.tile_size, self.tile_size], # type: ignore - num_patches=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, - sort_fn=self.filter_mode, - pad_mode="constant", - constant_values=self.background_val, - overlap=self.overlap, # type: ignore - threshold=self.intensity_threshold, - max_offset=max_offset, - ) + keys=[slides_dataset.IMAGE_COLUMN], + patch_size=[self.tile_size, self.tile_size], # type: ignore + num_patches=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, + sort_fn=self.filter_mode, + pad_mode="constant", + constant_values=self.background_val, + overlap=self.overlap, # type: ignore + threshold=self.intensity_threshold, + max_offset=max_offset, + ) base_transform = Compose([load_image_transform, random_grid_transform]) if self.transforms_dict and self.transforms_dict[stage]: diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 0b78bd856..20f500d0a 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -445,4 +445,4 @@ def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> results[key].append(updated_metadata_dict[key]) else: rank_zero_warn(message="Offset, patch location or patch size are not found in the batch" - "make sure to use RandGridPatch.") + "make sure to use RandGridPatch.") From 73f0798a1a1a238ae9f138a9f4a2150141e65297 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 5 Jul 2022 12:51:44 +0000 Subject: [PATCH 19/29] flake8 fixes --- .../src/histopathology/datamodules/base_module.py | 6 +++--- hi-ml-histopathology/src/histopathology/models/deepmil.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 043191217..c6c511b25 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -22,7 +22,7 @@ from monai.transforms.compose import Compose from monai.transforms.io.dictionary import LoadImaged -from monai.transforms import RandGridPatchd, GridPatchd +from monai.transforms import RandGridPatchd from monai.data.image_reader import WSIReader _SlidesOrTilesDataset = TypeVar('_SlidesOrTilesDataset', SlidesDataset, TilesDataset) @@ -293,7 +293,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas dtype=np.uint8, level=self.level, image_only=True, - ) + ) max_offset = None if (self.random_offset and stage == ModelKey.TRAIN) else 0 random_grid_transform = RandGridPatchd( keys=[slides_dataset.IMAGE_COLUMN], @@ -305,7 +305,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas overlap=self.overlap, # type: ignore threshold=self.intensity_threshold, max_offset=max_offset, - ) + ) base_transform = Compose([load_image_transform, random_grid_transform]) if self.transforms_dict and self.transforms_dict[stage]: diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 0b78bd856..0ce4c7f7c 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -3,10 +3,8 @@ # Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. # ------------------------------------------------------------------------------------------ -from logging import raiseExceptions from typing import Callable, Dict, List, Optional, Sequence, Tuple from pytorch_lightning.utilities.warnings import rank_zero_warn -import numpy as np import torch from pytorch_lightning import LightningModule From 73b044fd4021682b2e57a3c2147035d153f2db3e Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Tue, 5 Jul 2022 17:35:35 +0000 Subject: [PATCH 20/29] addressing some PR feedback, thanks @Kenza --- hi-ml-histopathology/.vscode/launch.json | 23 ------------------- hi-ml-histopathology/environment.yml | 4 +++- .../histopathology/datamodules/base_module.py | 20 ++++++++-------- 3 files changed, 12 insertions(+), 35 deletions(-) diff --git a/hi-ml-histopathology/.vscode/launch.json b/hi-ml-histopathology/.vscode/launch.json index 1654605fc..140e0d252 100644 --- a/hi-ml-histopathology/.vscode/launch.json +++ b/hi-ml-histopathology/.vscode/launch.json @@ -56,29 +56,6 @@ ], "console": "integratedTerminal", }, - { - "name": "Python: Long Run SlidesPandaImageNetMIL locally", - "type": "python", - "request": "launch", - "justMyCode": false, - "program": "${workspaceFolder}/../hi-ml/src/health_ml/runner.py", - "args": [ - "--model=histopathology.SlidesPandaImageNetMIL", - "--crossval_count=0", - "--batch_size=50", - "--max_bag_size=10", - "--max_bag_size_inf=10", - "--num_top_slides=2", - "--num_top_tiles=2", - "--max_num_gpus=1", - "--max_epochs 2", - "--pl_limit_train_batches 20", - "--pl_limit_test_batches 20", - "--pl_limit_val_batches 20" - - ], - "console": "integratedTerminal", - }, { "name": "Python: Run TilesPandaImageNetMIL locally", "type": "python", diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index 27848013a..9598c800a 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -46,7 +46,9 @@ dependencies: # commit of dev branch containing transform with coordinates # - git+https://github.com/Project-MONAI/MONAI.git@df4a7d72e1d231b898f88d92cf981721c49ceaeb # commit of dev branch including latest fixed to GridPatch 22/06 - - git+https://github.com/Project-MONAI/MONAI.git@669bddf581201f994d1bcc0cb780854901605d9b + # - git+https://github.com/Project-MONAI/MONAI.git@669bddf581201f994d1bcc0cb780854901605d9b + # commit of fork from dev branch that includes latest fixe to GridPatch 05/07 + - git+https://github.com/vale-salvatelli/Project-MONAI/MONAI.git@4a699513d177d9c857de6bdae43045668df0ce7e - more-itertools==8.10.0 - mypy==0.961 - mypy-extensions==0.4.3 diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index dcbb9dadd..0c035850a 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -245,13 +245,12 @@ def __init__( self, level: Optional[int] = 1, tile_size: Optional[int] = 224, - step: Optional[int] = None, random_offset: Optional[bool] = True, - pad_full: Optional[bool] = False, background_val: Optional[int] = 255, - filter_mode: Optional[str] = "min", + filter_mode: Optional[str] = "None", overlap: Optional[float] = 0, intensity_threshold: Optional[float] = 0, + pad_mode: Optional[str] = "constant", **kwargs: Any, ) -> None: """ @@ -263,19 +262,17 @@ def __init__( defaults to True. This param is passed to TileOnGridd monai transform for tiling on the fly. :param background_val: the background constant to ignore background tiles (e.g. 255 for white background), defaults to 255. This param is passed to TileOnGridd monai transform for tiling on the fly. - :param filter_mode: mode must be in ["min", "max", "random"]. If total number of tiles is greater than - tile_count, then sort by intensity sum, and take the smallest (for min), largest (for max) or random (for - random) subset, defaults to "min" (which assumes background is high value). This param is passed to - monai transform for tiling on the fly. - : param overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + :param filter_mode: when `num_patches` is provided, it determines if keep patches with highest values (`"max"`), + lowest values (`"min"`), or in their default order (`None`). Default to None. + :param overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. - : param intensity_threshold: a value to keep only the patches whose sum of intensities are less than the + :param intensity_threshold: a value to keep only the patches whose sum of intensities are less than the threshold. Defaults to no filtering. + :pad_mode: refer to NumpyPadMode and PytorchPadMode. If None, no padding will be applied. """ super().__init__(**kwargs) self.level = level self.tile_size = tile_size - self.step = step self.random_offset = random_offset self.background_val = background_val self.filter_mode = filter_mode @@ -285,6 +282,7 @@ def __init__( self.max_bag_size_inf = None if self.max_bag_size_inf == 0 else self.max_bag_size_inf # type: ignore self.overlap = overlap self.intensity_threshold = intensity_threshold + self.pad_mode = pad_mode def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Dataset: load_image_transform = LoadImaged( @@ -301,7 +299,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas patch_size=[self.tile_size, self.tile_size], # type: ignore num_patches=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, sort_fn=self.filter_mode, - pad_mode="constant", + pad_mode=self.pad_mode, constant_values=self.background_val, overlap=self.overlap, # type: ignore threshold=self.intensity_threshold, From 58c74ec58ead6bfbdbd4e001ee48cbcae02111a8 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 6 Jul 2022 17:04:43 +0000 Subject: [PATCH 21/29] more changes --- hi-ml-histopathology/environment.yml | 4 ++-- .../histopathology/configs/classification/DeepSMILEPanda.py | 2 -- .../src/histopathology/datamodules/base_module.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index 9598c800a..79d124934 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -47,8 +47,8 @@ dependencies: # - git+https://github.com/Project-MONAI/MONAI.git@df4a7d72e1d231b898f88d92cf981721c49ceaeb # commit of dev branch including latest fixed to GridPatch 22/06 # - git+https://github.com/Project-MONAI/MONAI.git@669bddf581201f994d1bcc0cb780854901605d9b - # commit of fork from dev branch that includes latest fixe to GridPatch 05/07 - - git+https://github.com/vale-salvatelli/Project-MONAI/MONAI.git@4a699513d177d9c857de6bdae43045668df0ce7e + # commit of fork from dev branch that includes latest fix to GridPatch 05/07 + - git+https://github.com/vale-salvatelli/MONAI.git@2e552df599b520064fa4418c3cac07376ae301b8 - more-itertools==8.10.0 - mypy==0.961 - mypy-extensions==0.4.3 diff --git a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py index 162f1755f..b10e0a90b 100644 --- a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py +++ b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py @@ -162,10 +162,8 @@ def get_data_module(self) -> PandaSlidesDataModule: max_bag_size=self.max_bag_size, max_bag_size_inf=self.max_bag_size_inf, tile_size=self.tile_size, - step=self.step, random_offset=self.random_offset, seed=self.get_effective_random_seed(), - pad_full=self.pad_full, background_val=self.background_val, filter_mode=self.filter_mode, transforms_dict=self.get_transforms_dict(PandaDataset.IMAGE_COLUMN), diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 0c035850a..79a3689c6 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -247,7 +247,7 @@ def __init__( tile_size: Optional[int] = 224, random_offset: Optional[bool] = True, background_val: Optional[int] = 255, - filter_mode: Optional[str] = "None", + filter_mode: Optional[str] = "max", overlap: Optional[float] = 0, intensity_threshold: Optional[float] = 0, pad_mode: Optional[str] = "constant", From 87e693f5f5bae17ee131b87390596237e13a77bb Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 6 Jul 2022 18:46:34 +0000 Subject: [PATCH 22/29] more feedback implemented --- .../configs/classification/DeepSMILEPanda.py | 4 ++ .../histopathology/datamodules/base_module.py | 41 +++++++++++++------ .../src/histopathology/models/deepmil.py | 35 +++++----------- .../src/histopathology/utils/naming.py | 4 +- .../src/histopathology/utils/wsi_utils.py | 2 +- 5 files changed, 46 insertions(+), 40 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py index b61aa577a..1a31c4d87 100644 --- a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py +++ b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py @@ -172,6 +172,10 @@ def get_data_module(self) -> PandaSlidesDataModule: def get_slides_dataset(self) -> PandaDataset: return PandaDataset(root=self.local_datasets[0]) # type: ignore + def get_test_plot_options(self) -> Set[PlotOption]: + plot_options = super().get_test_plot_options() + plot_options.add(PlotOption.SLIDE_THUMBNAIL_HEATMAP) + return plot_options class SlidesPandaImageNetMIL(DeepSMILESlidesPanda): def __init__(self, **kwargs: Any) -> None: diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 79a3689c6..44e7f2b47 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -22,7 +22,7 @@ from monai.transforms.compose import Compose from monai.transforms.io.dictionary import LoadImaged -from monai.transforms import RandGridPatchd +from monai.transforms import RandGridPatchd, GridPatchd from monai.data.image_reader import WSIReader _SlidesOrTilesDataset = TypeVar('_SlidesOrTilesDataset', SlidesDataset, TilesDataset) @@ -294,18 +294,33 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas image_only=True, ) max_offset = None if (self.random_offset and stage == ModelKey.TRAIN) else 0 - random_grid_transform = RandGridPatchd( - keys=[slides_dataset.IMAGE_COLUMN], - patch_size=[self.tile_size, self.tile_size], # type: ignore - num_patches=self.max_bag_size if stage == ModelKey.TRAIN else self.max_bag_size_inf, - sort_fn=self.filter_mode, - pad_mode=self.pad_mode, - constant_values=self.background_val, - overlap=self.overlap, # type: ignore - threshold=self.intensity_threshold, - max_offset=max_offset, - ) - base_transform = Compose([load_image_transform, random_grid_transform]) + + if stage != ModelKey.TRAIN: + grid_transform = RandGridPatchd( + keys=[slides_dataset.IMAGE_COLUMN], + patch_size=[self.tile_size, self.tile_size], # type: ignore + num_patches=self.max_bag_size, + sort_fn=self.filter_mode, + pad_mode=self.pad_mode, # type: ignore + constant_values=self.background_val, + overlap=self.overlap, # type: ignore + threshold=self.intensity_threshold, + max_offset=max_offset, + ) + else: + grid_transform = GridPatchd( + keys=[slides_dataset.IMAGE_COLUMN], + patch_size=[self.tile_size, self.tile_size], # type: ignore + num_patches=self.max_bag_size_inf, + sort_fn=self.filter_mode, + pad_mode=self.pad_mode, # type: ignore + constant_values=self.background_val, + overlap=self.overlap, # type: ignore + threshold=self.intensity_threshold, + max_offset=max_offset, + ) + + base_transform = Compose([load_image_transform, grid_transform]) if self.transforms_dict and self.transforms_dict[stage]: transforms = Compose([base_transform, self.transforms_dict[stage]]).flatten() # type: ignore diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index d5be5a99f..06ecb574c 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -356,7 +356,8 @@ def get_empty_lists(shape: int, n: int) -> List: return ll @staticmethod - def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: List) -> Tuple[int, int, int, int]: + def get_patch_coordinate(slide_offset: List, patch_location: List[int], patch_size: List[int] + ) -> Tuple[int, int, int, int]: """ computing absolute patch coordinate """ # PATCH_LOCATION is expected to have shape [y, x] top = slide_offset[0] + patch_location[0] @@ -366,32 +367,16 @@ def get_patch_coordinate(slide_offset: List, patch_location: List, patch_size: L return top, bottom, left, right @staticmethod - def expand_slide_constant_metadata(id: str, path: str, n_patches: int) -> Tuple[List, List, List]: + def expand_slide_constant_metadata(id: str, path: str, n_patches: int, top: List[int], + bottom: List[int], left: List[int], right: List[int]) -> Tuple[List, List, List]: """Duplicate metadata that is patch invariant to match the shape of other arrays""" slide_id = [id] * n_patches image_paths = [path] * n_patches - tile_id = [f"{id}_{tile_id}" for tile_id in range(n_patches)] + tile_id = [f"{id}_left_{left[i]}_top_{top[i]}_right_{right[i]}_bottom_{bottom[i]}" for i in range(n_patches)] return slide_id, image_paths, tile_id - @staticmethod - def check_patch_location_format(batch): - """Workaround for bug in MONAI that returns not consistent location""" - faulty_slides_idx = [] - for i, locations in enumerate(batch[SlideKey.PATCH_LOCATION]): - for location in locations: - if len(location) != 2: - print(f'Slide {batch[SlideKey.SLIDE_ID][i]} ' - f'will be skipped as its patches contained unexpected values in patch_location {location}') - faulty_slides_idx.append(batch[SlideKey.SLIDE_ID][i]) - break - n = len(faulty_slides_idx) - if n > 0: - print(f'{n} slides will be skipped because something was wrong in the patch location') - return faulty_slides_idx - - def get_slide_patch_coordinates( - self, slide_offset: List, patches_location: List, patch_size: List - ) -> Tuple[List, List, List, List]: + def get_slide_patch_coordinates(self, slide_offset: List, patches_location: List, patch_size: List + ) -> Tuple[List, List, List, List]: """ computing absolute coordinates for all patches in a slide""" top, bottom, left, right = self.get_empty_lists(len(patches_location), 4) for i, location in enumerate(patches_location): @@ -408,7 +393,9 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - path = batch[SlideKey.IMAGE_PATH][index] top, bottom, left, right = self.get_slide_patch_coordinates(offset, patches_location, patch_size) - slide_id, image_paths, tile_id = self.expand_slide_constant_metadata(id, path, n_patches) + slide_id, image_paths, tile_id = self.expand_slide_constant_metadata( + id, path, n_patches, top, bottom, left, right + ) metadata_dict[ResultsKey.TILE_TOP] = top metadata_dict[ResultsKey.TILE_BOTTOM] = bottom @@ -420,7 +407,7 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - return metadata_dict def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: - if all(key.value in batch.keys() for key in [SlideKey.OFFSET, SlideKey.PATCH_LOCATION, SlideKey.PATCH_SIZE]): + if all(key.value in batch.keys() for key in [SlideKey.OFFSET, SlideKey.TILE_LOCATION, SlideKey.TILE_SIZE]): n_slides = len(batch[SlideKey.SLIDE_ID]) metadata_dict = { ResultsKey.TILE_TOP: [], diff --git a/hi-ml-histopathology/src/histopathology/utils/naming.py b/hi-ml-histopathology/src/histopathology/utils/naming.py index 1531380b1..987569fb1 100644 --- a/hi-ml-histopathology/src/histopathology/utils/naming.py +++ b/hi-ml-histopathology/src/histopathology/utils/naming.py @@ -20,8 +20,8 @@ class SlideKey(str, Enum): FOREGROUND_THRESHOLD = 'foreground_threshold' METADATA = 'metadata' LOCATION = 'location' - PATCH_SIZE = WSIPatchKeys.SIZE.value # 'patch_size' - PATCH_LOCATION = WSIPatchKeys.LOCATION.value # 'patch_location' + TILE_SIZE = WSIPatchKeys.SIZE.value # 'patch_size' + TILE_LOCATION = WSIPatchKeys.LOCATION.value # 'patch_location' OFFSET = 'offset' SHAPE = 'original_spatial_shape' diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index d2724b39b..98b0ac025 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -15,7 +15,7 @@ SlideKey.SLIDE_ID, SlideKey.MASK_PATH, WSIPatchKeys.COUNT, - SlideKey.PATCH_SIZE, # TODO: remove in case we want to allow patches of different sizes from the same slide + SlideKey.TILE_SIZE, # TODO: remove in case we want to allow patches of different sizes from the same slide SlideKey.SHAPE, SlideKey.OFFSET ] From bf16cb02928086f295adb49768de30b19b9ff861 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Jul 2022 18:48:11 +0000 Subject: [PATCH 23/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../histopathology/configs/classification/DeepSMILEPanda.py | 1 + .../src/histopathology/datamodules/base_module.py | 4 ++-- hi-ml-histopathology/src/histopathology/models/deepmil.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py index 1a31c4d87..15772b94c 100644 --- a/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py +++ b/hi-ml-histopathology/src/histopathology/configs/classification/DeepSMILEPanda.py @@ -177,6 +177,7 @@ def get_test_plot_options(self) -> Set[PlotOption]: plot_options.add(PlotOption.SLIDE_THUMBNAIL_HEATMAP) return plot_options + class SlidesPandaImageNetMIL(DeepSMILESlidesPanda): def __init__(self, **kwargs: Any) -> None: super().__init__(encoder_type=ImageNetEncoder.__name__, **kwargs) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 44e7f2b47..d9eb11a75 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -301,7 +301,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas patch_size=[self.tile_size, self.tile_size], # type: ignore num_patches=self.max_bag_size, sort_fn=self.filter_mode, - pad_mode=self.pad_mode, # type: ignore + pad_mode=self.pad_mode, # type: ignore constant_values=self.background_val, overlap=self.overlap, # type: ignore threshold=self.intensity_threshold, @@ -313,7 +313,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas patch_size=[self.tile_size, self.tile_size], # type: ignore num_patches=self.max_bag_size_inf, sort_fn=self.filter_mode, - pad_mode=self.pad_mode, # type: ignore + pad_mode=self.pad_mode, # type: ignore constant_values=self.background_val, overlap=self.overlap, # type: ignore threshold=self.intensity_threshold, diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 06ecb574c..67bf80640 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -357,7 +357,7 @@ def get_empty_lists(shape: int, n: int) -> List: @staticmethod def get_patch_coordinate(slide_offset: List, patch_location: List[int], patch_size: List[int] - ) -> Tuple[int, int, int, int]: + ) -> Tuple[int, int, int, int]: """ computing absolute patch coordinate """ # PATCH_LOCATION is expected to have shape [y, x] top = slide_offset[0] + patch_location[0] From b65659904b1bf5c1b6fe50453fdd580f55ff9b0d Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 6 Jul 2022 21:56:45 +0000 Subject: [PATCH 24/29] fixing current test plus update env --- hi-ml-histopathology/environment.yml | 4 ++-- .../testhisto/testhisto/utils/test_wsi_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hi-ml-histopathology/environment.yml b/hi-ml-histopathology/environment.yml index 79d124934..09dbcfc5c 100644 --- a/hi-ml-histopathology/environment.yml +++ b/hi-ml-histopathology/environment.yml @@ -47,8 +47,8 @@ dependencies: # - git+https://github.com/Project-MONAI/MONAI.git@df4a7d72e1d231b898f88d92cf981721c49ceaeb # commit of dev branch including latest fixed to GridPatch 22/06 # - git+https://github.com/Project-MONAI/MONAI.git@669bddf581201f994d1bcc0cb780854901605d9b - # commit of fork from dev branch that includes latest fix to GridPatch 05/07 - - git+https://github.com/vale-salvatelli/MONAI.git@2e552df599b520064fa4418c3cac07376ae301b8 + # commit of dev branch that includes latest fix to GridPatch 06/07 + - git+https://github.com/Project-MONAI/MONAI.git@4ddd2bc3870a86fb0a300c20e680de48886bbfc1 - more-itertools==8.10.0 - mypy==0.961 - mypy-extensions==0.4.3 diff --git a/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py b/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py index 779b56834..537842a12 100644 --- a/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py +++ b/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py @@ -5,7 +5,7 @@ from typing import Any, Dict, List from typing import Sequence from histopathology.utils.naming import SlideKey -from histopathology.utils.wsi_utils import image_collate +from histopathology.utils.wsi_utils import array_collate from torch.utils.data import Dataset From aa3e6f1680f6255e530d04ebac7b91efe698dd65 Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 6 Jul 2022 22:03:51 +0000 Subject: [PATCH 25/29] fix flake8 --- .../src/histopathology/datamodules/base_module.py | 4 ++-- .../testhisto/testhisto/utils/test_wsi_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index d9eb11a75..19c021eb6 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -262,8 +262,8 @@ def __init__( defaults to True. This param is passed to TileOnGridd monai transform for tiling on the fly. :param background_val: the background constant to ignore background tiles (e.g. 255 for white background), defaults to 255. This param is passed to TileOnGridd monai transform for tiling on the fly. - :param filter_mode: when `num_patches` is provided, it determines if keep patches with highest values (`"max"`), - lowest values (`"min"`), or in their default order (`None`). Default to None. + :param filter_mode: when `num_patches` is provided, it determines if keep patches with highest values + (`"max"`), lowest values (`"min"`), or in their default order (`None`). Default to None. :param overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. :param intensity_threshold: a value to keep only the patches whose sum of intensities are less than the diff --git a/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py b/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py index 537842a12..d56a03fb5 100644 --- a/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py +++ b/hi-ml-histopathology/testhisto/testhisto/utils/test_wsi_utils.py @@ -39,7 +39,7 @@ def __getitem__(self, index: int) -> List[Dict[SlideKey, Any]]: @pytest.mark.parametrize("random_n_tiles", [False, True]) -def test_image_collate(random_n_tiles: bool) -> None: +def test_array_collate(random_n_tiles: bool) -> None: # random_n_tiles accounts for both train and inference settings where the number of tiles is fixed (during # training) and None during inference (validation and test) dataset = MockTiledWSIDataset(n_tiles=20, @@ -51,7 +51,7 @@ def test_image_collate(random_n_tiles: bool) -> None: batch_size = 5 samples_list = [dataset[idx] for idx in range(batch_size)] - batch: dict = image_collate(samples_list) + batch: dict = array_collate(samples_list) assert isinstance(batch, Dict) assert batch.keys() == samples_list[0].keys() # type: ignore From 4226ba371588fcefde52792fc7b21d4ea084159b Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 6 Jul 2022 22:16:16 +0000 Subject: [PATCH 26/29] minor fixes --- .../src/histopathology/models/deepmil.py | 10 +++++----- .../src/histopathology/utils/wsi_utils.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 67bf80640..4c9e3f176 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -3,7 +3,7 @@ # Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. # ------------------------------------------------------------------------------------------ import torch -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union from pytorch_lightning.utilities.warnings import rank_zero_warn from pathlib import Path @@ -357,7 +357,7 @@ def get_empty_lists(shape: int, n: int) -> List: @staticmethod def get_patch_coordinate(slide_offset: List, patch_location: List[int], patch_size: List[int] - ) -> Tuple[int, int, int, int]: + ) -> Tuple[int, int, int, int]: """ computing absolute patch coordinate """ # PATCH_LOCATION is expected to have shape [y, x] top = slide_offset[0] + patch_location[0] @@ -386,8 +386,8 @@ def get_slide_patch_coordinates(self, slide_offset: List, patches_location: List def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) -> Dict: """compute patch-dependent and patch-invariante metadata for a single slide """ offset = batch[SlideKey.OFFSET.value][index] - patches_location = batch[SlideKey.PATCH_LOCATION.value][index] - patch_size = batch[SlideKey.PATCH_SIZE.value][index] + patches_location = batch[SlideKey.TILE_LOCATION.value][index] + patch_size = batch[SlideKey.TILE_SIZE.value][index] n_patches = len(patches_location) id = batch[SlideKey.SLIDE_ID][index] path = batch[SlideKey.IMAGE_PATH][index] @@ -409,7 +409,7 @@ def compute_slide_metadata(self, batch: Dict, index: int, metadata_dict: Dict) - def update_results_with_data_specific_info(self, batch: Dict, results: Dict) -> None: if all(key.value in batch.keys() for key in [SlideKey.OFFSET, SlideKey.TILE_LOCATION, SlideKey.TILE_SIZE]): n_slides = len(batch[SlideKey.SLIDE_ID]) - metadata_dict = { + metadata_dict: Dict[str, List[Union[int, str]]] = { ResultsKey.TILE_TOP: [], ResultsKey.TILE_BOTTOM: [], ResultsKey.TILE_LEFT: [], diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 98b0ac025..0d764b7d3 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -32,7 +32,7 @@ def array_collate(batch: List) -> Any: constant_keys = slide_metadata_keys for key in batch[0][0].keys(): if key not in slide_metadata_keys: - if type(batch[0][0][key]) == np.ndarray: + if isinstance(batch[0][0][key], np.ndarray): collate_keys.append(key) else: logging.warning(f'Only np.ndarray are collated - {key} value will be taken from first patch') From 4a970893c4d785d2a38ccef6d312fc04e353786b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Jul 2022 22:17:22 +0000 Subject: [PATCH 27/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hi-ml-histopathology/src/histopathology/models/deepmil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hi-ml-histopathology/src/histopathology/models/deepmil.py b/hi-ml-histopathology/src/histopathology/models/deepmil.py index 4c9e3f176..18878fd1e 100644 --- a/hi-ml-histopathology/src/histopathology/models/deepmil.py +++ b/hi-ml-histopathology/src/histopathology/models/deepmil.py @@ -357,7 +357,7 @@ def get_empty_lists(shape: int, n: int) -> List: @staticmethod def get_patch_coordinate(slide_offset: List, patch_location: List[int], patch_size: List[int] - ) -> Tuple[int, int, int, int]: + ) -> Tuple[int, int, int, int]: """ computing absolute patch coordinate """ # PATCH_LOCATION is expected to have shape [y, x] top = slide_offset[0] + patch_location[0] From 50a58c04e10ab10ea31357be6e09e978e449eced Mon Sep 17 00:00:00 2001 From: vale-salvatelli Date: Wed, 6 Jul 2022 23:45:48 +0000 Subject: [PATCH 28/29] fixing issue with unexpcted batch size due to changes in MONAI dev --- .../src/histopathology/datamodules/base_module.py | 2 +- .../src/histopathology/utils/wsi_utils.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py index 19c021eb6..cc6f8e119 100644 --- a/hi-ml-histopathology/src/histopathology/datamodules/base_module.py +++ b/hi-ml-histopathology/src/histopathology/datamodules/base_module.py @@ -317,7 +317,7 @@ def _load_dataset(self, slides_dataset: SlidesDataset, stage: ModelKey) -> Datas constant_values=self.background_val, overlap=self.overlap, # type: ignore threshold=self.intensity_threshold, - max_offset=max_offset, + offset=max_offset, ) base_transform = Compose([load_image_transform, grid_transform]) diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 0d764b7d3..900833d53 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -6,6 +6,7 @@ from histopathology.utils.naming import SlideKey from health_ml.utils.bag_utils import multibag_collate from monai.utils import WSIPatchKeys +from monai.data import MetaTensor slide_metadata_keys = [ SlideKey.IMAGE_PATH, @@ -26,16 +27,17 @@ def array_collate(batch: List) -> Any: Combine instances from a list of dicts into a single dict, by stacking arrays along first dim [{'image' : 3xHxW}, {'image' : 3xHxW}, {'image' : 3xHxW}...] - > {'image' : Nx3xHxW} followed by the default collate which will form a batch BxNx3xHxW. It also convert some values to tensors. - The list of dicts refers to the the list of tiles produced by GridPatch transform applied on a WSI. + The list of dicts refers to the list of tiles produced by GridPatch transform applied on a WSI. """ collate_keys = [] constant_keys = slide_metadata_keys for key in batch[0][0].keys(): if key not in slide_metadata_keys: - if isinstance(batch[0][0][key], np.ndarray): + if isinstance(batch[0][0][key], np.ndarray) or isinstance(batch[0][0][key], MetaTensor): collate_keys.append(key) else: - logging.warning(f'Only np.ndarray are collated - {key} value will be taken from first patch') + logging.warning("Only np.ndarray and MetaTensors are collated -" + f"{key} value will be taken from first patch") constant_keys.append(key) tensor_keys = collate_keys + [SlideKey.LABEL] @@ -44,7 +46,11 @@ def array_collate(batch: List) -> Any: # we assume all patches are dictionaries with the same keys data = patch_data[0] for key in collate_keys: - data[key] = np.array([ix[key] for ix in patch_data]) + if isinstance(data[key], np.ndarray): + data[key] = np.array([ix[key] for ix in patch_data]) + elif isinstance(data[key], MetaTensor): + #TODO change how this collation happens if we have list of tensors + data[key] = np.array([ix[key].as_tensor().numpy() for ix in patch_data]) for key in tensor_keys: data[key] = torch.tensor(data[key]) new_batch.append(data) From 600dd063f1fd2689812b5b7ddeb7f34898f678d9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Jul 2022 23:48:17 +0000 Subject: [PATCH 29/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hi-ml-histopathology/src/histopathology/utils/wsi_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py index 900833d53..6fd9636d6 100644 --- a/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py +++ b/hi-ml-histopathology/src/histopathology/utils/wsi_utils.py @@ -49,7 +49,7 @@ def array_collate(batch: List) -> Any: if isinstance(data[key], np.ndarray): data[key] = np.array([ix[key] for ix in patch_data]) elif isinstance(data[key], MetaTensor): - #TODO change how this collation happens if we have list of tensors + # TODO change how this collation happens if we have list of tensors data[key] = np.array([ix[key].as_tensor().numpy() for ix in patch_data]) for key in tensor_keys: data[key] = torch.tensor(data[key])