From 53958488cbb30a63a18d7f4b3e3ed9e0f109c852 Mon Sep 17 00:00:00 2001 From: Soumya Snigdha Kundu Date: Wed, 18 Mar 2026 13:57:13 +0000 Subject: [PATCH 1/4] Fix memory leak in `optional_import` traceback handling (#7480, #7727) When an import fails, `optional_import` captured the live traceback object and stored it in a `_LazyRaise` closure. The traceback held references to stack frames containing CUDA tensors, preventing garbage collection and causing GPU memory leaks. Replace the live traceback with a string-formatted copy via `traceback.format_exception()` and clear the original with `import_exception.__traceback__ = None`. The formatted traceback is appended to the error message so debugging information is preserved. Also move three hot-path `optional_import` calls in `monai/transforms/utils.py` (cucim.skimage, cucim morphology EDT) from function bodies to module level, eliminating repeated leaked tracebacks on every invocation. Fixes #7480, fixes #7727 Signed-off-by: Soumya Snigdha Kundu --- monai/transforms/utils.py | 24 +++++++++++------------ monai/utils/module.py | 19 +++++++++++------- tests/utils/test_optional_import.py | 30 +++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py index 9f1429d477..03702e444c 100644 --- a/monai/transforms/utils.py +++ b/monai/transforms/utils.py @@ -85,6 +85,10 @@ cp, has_cp = optional_import("cupy") cp_ndarray, _ = optional_import("cupy", name="ndarray") exposure, has_skimage = optional_import("skimage.exposure") +_cucim_skimage, _has_cucim_skimage = optional_import("cucim.skimage") +_cucim_morphology_edt, _has_cucim_morphology = optional_import( + "cucim.core.operations.morphology", name="distance_transform_edt" +) __all__ = [ "allow_missing_keys_mode", @@ -1147,11 +1151,10 @@ def get_largest_connected_component_mask( """ # use skimage/cucim.skimage and np/cp depending on whether packages are # available and input is non-cpu torch.tensor - skimage, has_cucim = optional_import("cucim.skimage") - use_cp = has_cp and has_cucim and isinstance(img, torch.Tensor) and img.device != torch.device("cpu") + use_cp = has_cp and _has_cucim_skimage and isinstance(img, torch.Tensor) and img.device != torch.device("cpu") if use_cp: img_ = convert_to_cupy(img.short()) # type: ignore - label = skimage.measure.label + label = _cucim_skimage.measure.label lib = cp else: if not has_measure: @@ -1204,13 +1207,13 @@ def keep_merge_components_with_points( margins: include points outside of the region but within the margin. """ - cucim_skimage, has_cucim = optional_import("cucim.skimage") - - use_cp = has_cp and has_cucim and isinstance(img_pos, torch.Tensor) and img_pos.device != torch.device("cpu") + use_cp = ( + has_cp and _has_cucim_skimage and isinstance(img_pos, torch.Tensor) and img_pos.device != torch.device("cpu") + ) if use_cp: img_pos_ = convert_to_cupy(img_pos.short()) # type: ignore img_neg_ = convert_to_cupy(img_neg.short()) # type: ignore - label = cucim_skimage.measure.label + label = _cucim_skimage.measure.label lib = cp else: if not has_measure: @@ -2463,10 +2466,7 @@ def distance_transform_edt( Returned only when `return_indices` is True and `indices` is not supplied. dtype np.float64. """ - distance_transform_edt, has_cucim = optional_import( - "cucim.core.operations.morphology", name="distance_transform_edt" - ) - use_cp = has_cp and has_cucim and isinstance(img, torch.Tensor) and img.device.type == "cuda" + use_cp = has_cp and _has_cucim_morphology and isinstance(img, torch.Tensor) and img.device.type == "cuda" if not return_distances and not return_indices: raise RuntimeError("Neither return_distances nor return_indices True") @@ -2499,7 +2499,7 @@ def distance_transform_edt( indices_ = convert_to_cupy(indices) img_ = convert_to_cupy(img) for channel_idx in range(img_.shape[0]): - distance_transform_edt( + _cucim_morphology_edt( img_[channel_idx], sampling=sampling, return_distances=return_distances, diff --git a/monai/utils/module.py b/monai/utils/module.py index a64f73cd6b..c8851714ce 100644 --- a/monai/utils/module.py +++ b/monai/utils/module.py @@ -17,6 +17,7 @@ import pdb import re import sys +import traceback as traceback_mod import warnings from collections.abc import Callable, Collection, Hashable, Iterable, Mapping from functools import partial, wraps @@ -368,8 +369,9 @@ def optional_import( OptionalImportError: from torch.nn.functional import conv1d (requires version '42' by 'min_version'). """ - tb = None + had_exception = False exception_str = "" + tb_str = "" if name: actual_cmd = f"from {module} import {name}" else: @@ -384,8 +386,12 @@ def optional_import( if name: # user specified to load class/function/... from the module the_module = getattr(the_module, name) except Exception as import_exception: # any exceptions during import - tb = import_exception.__traceback__ + tb_str = "".join( + traceback_mod.format_exception(type(import_exception), import_exception, import_exception.__traceback__) + ) + import_exception.__traceback__ = None exception_str = f"{import_exception}" + had_exception = True else: # found the module if version_args and version_checker(pkg, f"{version}", version_args): return the_module, True @@ -394,7 +400,7 @@ def optional_import( # preparing lazy error message msg = descriptor.format(actual_cmd) - if version and tb is None: # a pure version issue + if version and not had_exception: # a pure version issue msg += f" (requires '{module} {version}' by '{version_checker.__name__}')" if exception_str: msg += f" ({exception_str})" @@ -407,10 +413,9 @@ def __init__(self, *_args, **_kwargs): + "\n\nFor details about installing the optional dependencies, please visit:" + "\n https://monai.readthedocs.io/en/latest/installation.html#installing-the-recommended-dependencies" ) - if tb is None: - self._exception = OptionalImportError(_default_msg) - else: - self._exception = OptionalImportError(_default_msg).with_traceback(tb) + if tb_str: + _default_msg += f"\n\nOriginal traceback:\n{tb_str}" + self._exception = OptionalImportError(_default_msg) def __getattr__(self, name): """ diff --git a/tests/utils/test_optional_import.py b/tests/utils/test_optional_import.py index 2f640f88d0..d8aa55b907 100644 --- a/tests/utils/test_optional_import.py +++ b/tests/utils/test_optional_import.py @@ -11,7 +11,9 @@ from __future__ import annotations +import gc import unittest +import weakref from parameterized import parameterized @@ -75,6 +77,34 @@ def versioning(module, ver, a): nn, flag = optional_import("torch", "1.1", version_checker=versioning, name="nn", version_args=test_args) self.assertTrue(flag) + def test_no_traceback_leak(self): + """Verify optional_import does not retain references to stack frames (issue #7480).""" + + class _Marker: + pass + + def _do_import(): + marker = _Marker() + ref = weakref.ref(marker) + # Call optional_import for a module that does not exist. + # If the traceback is leaked, `marker` stays alive via frame references. + mod, flag = optional_import("nonexistent_module_for_leak_test") + self.assertFalse(flag) + return ref + + ref = _do_import() + gc.collect() + self.assertIsNone(ref(), "optional_import is leaking frame references via traceback") + + def test_failed_import_shows_traceback_string(self): + """Verify the error message includes the original traceback as a string.""" + mod, flag = optional_import("nonexistent_module_for_tb_test") + self.assertFalse(flag) + with self.assertRaises(OptionalImportError) as ctx: + mod.something + self.assertIn("Original traceback", str(ctx.exception)) + self.assertIn("ModuleNotFoundError", str(ctx.exception)) + if __name__ == "__main__": unittest.main() From 706cc973e5d9eaaab22cdef3a236c461f2d5ed6b Mon Sep 17 00:00:00 2001 From: Soumya Snigdha Kundu Date: Sun, 12 Apr 2026 12:00:32 +0100 Subject: [PATCH 2/4] Revert cucim module-level imports per review feedback Restore cucim imports inside functions to avoid slow import times and buggy behaviour. Add comment explaining why cucim is not at module level. Also rename unused `mod` to `_mod` in test per CodeRabbit nitpick. Signed-off-by: Soumya Snigdha Kundu --- monai/transforms/utils.py | 27 +++++++++++++++------------ tests/utils/test_optional_import.py | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py index 03702e444c..abc692c768 100644 --- a/monai/transforms/utils.py +++ b/monai/transforms/utils.py @@ -85,10 +85,9 @@ cp, has_cp = optional_import("cupy") cp_ndarray, _ = optional_import("cupy", name="ndarray") exposure, has_skimage = optional_import("skimage.exposure") -_cucim_skimage, _has_cucim_skimage = optional_import("cucim.skimage") -_cucim_morphology_edt, _has_cucim_morphology = optional_import( - "cucim.core.operations.morphology", name="distance_transform_edt" -) +# NOTE: cucim is deliberately NOT imported at module level. +# Module-level cucim imports caused very slow import times and other buggy behaviour. +# Keep cucim imports inside the functions that need them. __all__ = [ "allow_missing_keys_mode", @@ -1151,10 +1150,11 @@ def get_largest_connected_component_mask( """ # use skimage/cucim.skimage and np/cp depending on whether packages are # available and input is non-cpu torch.tensor - use_cp = has_cp and _has_cucim_skimage and isinstance(img, torch.Tensor) and img.device != torch.device("cpu") + skimage, has_cucim = optional_import("cucim.skimage") + use_cp = has_cp and has_cucim and isinstance(img, torch.Tensor) and img.device != torch.device("cpu") if use_cp: img_ = convert_to_cupy(img.short()) # type: ignore - label = _cucim_skimage.measure.label + label = skimage.measure.label lib = cp else: if not has_measure: @@ -1207,13 +1207,13 @@ def keep_merge_components_with_points( margins: include points outside of the region but within the margin. """ - use_cp = ( - has_cp and _has_cucim_skimage and isinstance(img_pos, torch.Tensor) and img_pos.device != torch.device("cpu") - ) + cucim_skimage, has_cucim = optional_import("cucim.skimage") + + use_cp = has_cp and has_cucim and isinstance(img_pos, torch.Tensor) and img_pos.device != torch.device("cpu") if use_cp: img_pos_ = convert_to_cupy(img_pos.short()) # type: ignore img_neg_ = convert_to_cupy(img_neg.short()) # type: ignore - label = _cucim_skimage.measure.label + label = cucim_skimage.measure.label lib = cp else: if not has_measure: @@ -2466,7 +2466,10 @@ def distance_transform_edt( Returned only when `return_indices` is True and `indices` is not supplied. dtype np.float64. """ - use_cp = has_cp and _has_cucim_morphology and isinstance(img, torch.Tensor) and img.device.type == "cuda" + distance_transform_edt, has_cucim = optional_import( + "cucim.core.operations.morphology", name="distance_transform_edt" + ) + use_cp = has_cp and has_cucim and isinstance(img, torch.Tensor) and img.device.type == "cuda" if not return_distances and not return_indices: raise RuntimeError("Neither return_distances nor return_indices True") @@ -2499,7 +2502,7 @@ def distance_transform_edt( indices_ = convert_to_cupy(indices) img_ = convert_to_cupy(img) for channel_idx in range(img_.shape[0]): - _cucim_morphology_edt( + distance_transform_edt( img_[channel_idx], sampling=sampling, return_distances=return_distances, diff --git a/tests/utils/test_optional_import.py b/tests/utils/test_optional_import.py index d8aa55b907..3594d3e67e 100644 --- a/tests/utils/test_optional_import.py +++ b/tests/utils/test_optional_import.py @@ -88,7 +88,7 @@ def _do_import(): ref = weakref.ref(marker) # Call optional_import for a module that does not exist. # If the traceback is leaked, `marker` stays alive via frame references. - mod, flag = optional_import("nonexistent_module_for_leak_test") + _mod, flag = optional_import("nonexistent_module_for_leak_test") self.assertFalse(flag) return ref From b45e9112395d46830b15aff531a137d0b6023a15 Mon Sep 17 00:00:00 2001 From: Soumya Snigdha Kundu Date: Tue, 14 Apr 2026 13:13:45 +0100 Subject: [PATCH 3/4] Update tests/utils/test_optional_import.py with feedback Co-authored-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Signed-off-by: Soumya Snigdha Kundu --- tests/utils/test_optional_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_optional_import.py b/tests/utils/test_optional_import.py index 3594d3e67e..e482669051 100644 --- a/tests/utils/test_optional_import.py +++ b/tests/utils/test_optional_import.py @@ -101,7 +101,7 @@ def test_failed_import_shows_traceback_string(self): mod, flag = optional_import("nonexistent_module_for_tb_test") self.assertFalse(flag) with self.assertRaises(OptionalImportError) as ctx: - mod.something + _ = getattr(mod, "something") self.assertIn("Original traceback", str(ctx.exception)) self.assertIn("ModuleNotFoundError", str(ctx.exception)) From 880b6a989504f877e95af857cd711f8ce2e5c5a3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 12:16:33 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/utils/test_optional_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_optional_import.py b/tests/utils/test_optional_import.py index e482669051..b5bc914c92 100644 --- a/tests/utils/test_optional_import.py +++ b/tests/utils/test_optional_import.py @@ -101,7 +101,7 @@ def test_failed_import_shows_traceback_string(self): mod, flag = optional_import("nonexistent_module_for_tb_test") self.assertFalse(flag) with self.assertRaises(OptionalImportError) as ctx: - _ = getattr(mod, "something") + _ = mod.something self.assertIn("Original traceback", str(ctx.exception)) self.assertIn("ModuleNotFoundError", str(ctx.exception))