From 0a0659539aa92e43fdd7926fc2045362aecba9f9 Mon Sep 17 00:00:00 2001 From: Jerry Wang Date: Sat, 16 May 2026 20:48:23 +0800 Subject: [PATCH] Make kmedoids dependency optional with lazy import The top-level 'import kmedoids' in core/initial_selection_strategies made every active-learning strategy depend on the kmedoids package because Hydra imports the whole module to instantiate any strategy. This broke run_config.py in environments that hadn't installed kmedoids yet, even when the user was not selecting k-medoids. Defer the import to KMedoidsInitialSelection.__init__ (fail-fast for opted-in users, invisible for everyone else) and move 'kmedoids' from required dependencies to an optional extra '[kmedoids]' so the default install no longer pulls it in. --- core/initial_selection_strategies.py | 16 +++++++++++++++- pyproject.toml | 4 +++- uv.lock | 8 +++++--- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/core/initial_selection_strategies.py b/core/initial_selection_strategies.py index 074605f..62b505f 100644 --- a/core/initial_selection_strategies.py +++ b/core/initial_selection_strategies.py @@ -5,7 +5,6 @@ import logging from abc import ABC, abstractmethod -import kmedoids import numpy as np from sklearn.cluster import KMeans, MiniBatchKMeans from sklearn.metrics import pairwise_distances, pairwise_distances_argmin_min @@ -142,6 +141,19 @@ def __init__( max_iter: int = 100, init: str = "build", ) -> None: + # Imported lazily so that environments without the optional + # ``kmedoids`` package can still use the other strategies. Hydra + # only constructs this class when it's actually selected, so the + # ImportError surfaces immediately and only for users who opted + # in to k-medoids. + try: + import kmedoids # noqa: F401 (probe install, used in _kmedoids) + except ImportError as exc: # pragma: no cover - exercised only without dep + raise ImportError( + "KMedoidsInitialSelection requires the 'kmedoids' package. " + "Install it with `uv pip install kmedoids` (or `uv sync`)." + ) from exc + super().__init__("KMEDOIDS", starting_batch_size=starting_batch_size) self.seed = seed self.metric = metric @@ -163,6 +175,8 @@ def select( return selected def _kmedoids(self, embeddings: np.ndarray) -> list[int]: + import kmedoids # already validated in __init__ + num_samples = embeddings.shape[0] k = min(self.starting_batch_size, num_samples) if k == 0: diff --git a/pyproject.toml b/pyproject.toml index e89b864..b54eca8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ classifiers = [ dependencies = [ "botorch>=0.16.1", "hydra-core>=1.3.2", - "kmedoids>=0.5.1", "numpy>=1.21.0", "pandas>=1.3.0", "scikit-learn>=1.6.0", @@ -42,6 +41,9 @@ cluster = [ "hydra-submitit-launcher>=1.2.0", "submitit>=1.5.3", ] +kmedoids = [ + "kmedoids>=0.5.1", +] [project.urls] Homepage = "https://github.com/cellethology/deepdraw" diff --git a/uv.lock b/uv.lock index a69009b..6e90384 100644 --- a/uv.lock +++ b/uv.lock @@ -392,7 +392,6 @@ source = { editable = "." } dependencies = [ { name = "botorch" }, { name = "hydra-core" }, - { name = "kmedoids" }, { name = "numpy" }, { name = "pandas" }, { name = "scikit-learn" }, @@ -408,6 +407,9 @@ cluster = [ { name = "hydra-submitit-launcher" }, { name = "submitit" }, ] +kmedoids = [ + { name = "kmedoids" }, +] [package.dev-dependencies] dev = [ @@ -429,7 +431,7 @@ requires-dist = [ { name = "botorch", specifier = ">=0.16.1" }, { name = "hydra-core", specifier = ">=1.3.2" }, { name = "hydra-submitit-launcher", marker = "extra == 'cluster'", specifier = ">=1.2.0" }, - { name = "kmedoids", specifier = ">=0.5.1" }, + { name = "kmedoids", marker = "extra == 'kmedoids'", specifier = ">=0.5.1" }, { name = "numpy", specifier = ">=1.21.0" }, { name = "openpyxl", marker = "extra == 'analysis'", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=1.3.0" }, @@ -438,7 +440,7 @@ requires-dist = [ { name = "submitit", marker = "extra == 'cluster'", specifier = ">=1.5.3" }, { name = "tqdm", marker = "extra == 'analysis'", specifier = "<=4.66.5" }, ] -provides-extras = ["analysis", "cluster"] +provides-extras = ["analysis", "cluster", "kmedoids"] [package.metadata.requires-dev] dev = [