From 557eb99c961f741ea790378f4a09b8c0dd497761 Mon Sep 17 00:00:00 2001 From: drgmo Date: Thu, 15 Jan 2026 14:20:43 +0000 Subject: [PATCH 1/5] added function to align permutated, but same coords --- src/stamp/encoding/encoder/eagle.py | 58 ++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/src/stamp/encoding/encoder/eagle.py b/src/stamp/encoding/encoder/eagle.py index b2fb293d..e294f491 100644 --- a/src/stamp/encoding/encoder/eagle.py +++ b/src/stamp/encoding/encoder/eagle.py @@ -1,5 +1,6 @@ import logging import os +from collections import defaultdict, deque from pathlib import Path import numpy as np @@ -59,11 +60,26 @@ def _validate_and_read_features_with_agg( f"Features located in {h5_vir2} are extracted with {extractor}" ) - if feats.shape[0] != agg_feats.shape[0]: - raise ValueError( - f"Number of ctranspath features and virchow2 features do not match:" - f" {feats.shape[0]} != {agg_feats.shape[0]}" - ) + # if feats.shape[0] != agg_feats.shape[0]: + # raise ValueError( + # f"Number of ctranspath features and virchow2 features do not match:" + # f" {feats.shape[0]} != {agg_feats.shape[0]}" + # ) + if not np.allclose(coords.coords_um, agg_coords.coords_um, atol=1e-5, rtol=0): + # Try to fix permutation by aligning virchow2 to ctp coords + try: + agg_feats, aligned_agg_coords = _align_vir2_to_ctp_by_coords( + ref_coords_um=coords.coords_um, + other_coords_um=agg_coords.coords_um, + other_feats=agg_feats, + decimals=4, # tune if needed + ) + agg_coords.coords_um = aligned_agg_coords # optional, for debugging + except ValueError as e: + raise ValueError( + f"Coordinates mismatch between ctranspath and virchow2 features for slide " + f"{slide_name}. Alignment attempt failed: {e}" + ) if not np.allclose(coords.coords_um, agg_coords.coords_um, atol=1e-5, rtol=0): raise ValueError( @@ -238,3 +254,35 @@ def encode_patients_( self._save_features_( output_path=output_path, feats=patient_embedding, feat_type="patient" ) + +def _align_vir2_to_ctp_by_coords( + ref_coords_um: np.ndarray, + other_coords_um: np.ndarray, + other_feats: torch.Tensor, + decimals: int = 4, +) -> tuple[torch.Tensor, np.ndarray]: + """Align vir2 features to ctp features based on coordinates.""" + ref = np.round(np.asarray(ref_coords_um, dtype=np.float64), decimals) + oth = np.round(np.asarray(other_coords_um, dtype=np.float64), decimals) + + # coord -> queue(indices) + buckets = defaultdict(deque) + for j, key in enumerate(map(tuple, oth)): + buckets[key].append(j) + + perm = np.empty(ref.shape[0], dtype=np.int64) + for i, key in enumerate(map(tuple, ref)): + if not buckets[key]: + raise ValueError(f"Missing coord in other set: {key}") + perm[i] = buckets[key].popleft() + + # optional: check if other has extras not used + unused = sum(len(q) for q in buckets.values()) + if unused != 0: + raise ValueError(f"virchow2 features contain {unused} extra coords not in ref.") + + perm_t = torch.as_tensor(perm, dtype=torch.long, device=other_feats.device) + aligned_feats = other_feats.index_select(0, perm_t) + aligned_coords = other_coords_um[perm] + print("") + return aligned_feats, aligned_coords \ No newline at end of file From 9395bd12e36586a4200cd3cd5bd561c7d014fd44 Mon Sep 17 00:00:00 2001 From: drgmo Date: Thu, 15 Jan 2026 15:15:42 +0000 Subject: [PATCH 2/5] rounding corrected --- src/stamp/encoding/encoder/eagle.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/stamp/encoding/encoder/eagle.py b/src/stamp/encoding/encoder/eagle.py index e294f491..240a0546 100644 --- a/src/stamp/encoding/encoder/eagle.py +++ b/src/stamp/encoding/encoder/eagle.py @@ -72,7 +72,7 @@ def _validate_and_read_features_with_agg( ref_coords_um=coords.coords_um, other_coords_um=agg_coords.coords_um, other_feats=agg_feats, - decimals=4, # tune if needed + decimals=5, ) agg_coords.coords_um = aligned_agg_coords # optional, for debugging except ValueError as e: @@ -160,7 +160,7 @@ def encode_slides_( for tile_feats_filename in (progress := tqdm(os.listdir(feat_dir))): h5_ctp = os.path.join(feat_dir, tile_feats_filename) h5_vir2 = os.path.join(agg_feat_dir, tile_feats_filename) - slide_name: str = Path(tile_feats_filename).stem + slide_name: str = Path(tile_feats_filename).name progress.set_description(slide_name) # skip patient in case feature file already exists @@ -259,7 +259,7 @@ def _align_vir2_to_ctp_by_coords( ref_coords_um: np.ndarray, other_coords_um: np.ndarray, other_feats: torch.Tensor, - decimals: int = 4, + decimals: int = 5, ) -> tuple[torch.Tensor, np.ndarray]: """Align vir2 features to ctp features based on coordinates.""" ref = np.round(np.asarray(ref_coords_um, dtype=np.float64), decimals) @@ -282,7 +282,8 @@ def _align_vir2_to_ctp_by_coords( raise ValueError(f"virchow2 features contain {unused} extra coords not in ref.") perm_t = torch.as_tensor(perm, dtype=torch.long, device=other_feats.device) + # Align features according to the permutation as well ! aligned_feats = other_feats.index_select(0, perm_t) aligned_coords = other_coords_um[perm] print("") - return aligned_feats, aligned_coords \ No newline at end of file + return aligned_feats, aligned_coords From 488e22902fd86574971035013ca326c1a0699bc6 Mon Sep 17 00:00:00 2001 From: drgmo Date: Thu, 15 Jan 2026 20:06:13 +0000 Subject: [PATCH 3/5] formatted --- src/stamp/encoding/encoder/eagle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/stamp/encoding/encoder/eagle.py b/src/stamp/encoding/encoder/eagle.py index 240a0546..d966c84e 100644 --- a/src/stamp/encoding/encoder/eagle.py +++ b/src/stamp/encoding/encoder/eagle.py @@ -255,6 +255,7 @@ def encode_patients_( output_path=output_path, feats=patient_embedding, feat_type="patient" ) + def _align_vir2_to_ctp_by_coords( ref_coords_um: np.ndarray, other_coords_um: np.ndarray, From 32c81013cffbdb05b8615dc5787a69924968dfe5 Mon Sep 17 00:00:00 2001 From: drgmo Date: Sun, 18 Jan 2026 21:31:39 +0000 Subject: [PATCH 4/5] replaced resolve_extractorname() so that hash-tags are removed for clean extractor names --- src/stamp/encoding/encoder/__init__.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/stamp/encoding/encoder/__init__.py b/src/stamp/encoding/encoder/__init__.py index ca124214..d5e1b296 100644 --- a/src/stamp/encoding/encoder/__init__.py +++ b/src/stamp/encoding/encoder/__init__.py @@ -218,31 +218,21 @@ def _save_features_( _logger.debug(f"saved features to {output_path}") -def _resolve_extractor_name(raw: str) -> ExtractorName: - """ - Resolve an extractor string to a valid ExtractorName. - - Handles: - - exact matches ('gigapath', 'virchow-full') - - versioned strings like 'gigapath-ae23d', 'virchow-full-2025abc' - Raises ValueError if the base name is not recognized. - """ +def _resolve_extractorname(raw: str) -> ExtractorName: if not raw: raise ValueError("Empty extractor string") name = str(raw).strip().lower() + name = name.replace("", "-") - # Exact match for e in ExtractorName: if name == e.value.lower(): return e - # Versioned form: '-something' for e in ExtractorName: if name.startswith(e.value.lower() + "-"): return e - # Otherwise fail raise ValueError( f"Unknown extractor '{raw}'. " f"Expected one of {[e.value for e in ExtractorName]} " From f008ae1da0e0280104ccce2baa6a900e1c0233e1 Mon Sep 17 00:00:00 2001 From: drgmo Date: Sun, 18 Jan 2026 22:14:08 +0000 Subject: [PATCH 5/5] fix name resolving --- src/stamp/encoding/encoder/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stamp/encoding/encoder/__init__.py b/src/stamp/encoding/encoder/__init__.py index d5e1b296..9e76af58 100644 --- a/src/stamp/encoding/encoder/__init__.py +++ b/src/stamp/encoding/encoder/__init__.py @@ -218,12 +218,12 @@ def _save_features_( _logger.debug(f"saved features to {output_path}") -def _resolve_extractorname(raw: str) -> ExtractorName: +def _resolve_extractor_name(raw: str) -> ExtractorName: if not raw: raise ValueError("Empty extractor string") name = str(raw).strip().lower() - name = name.replace("", "-") + name = name.replace("_", "-") for e in ExtractorName: if name == e.value.lower():