Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/cellmap_segmentation_challenge/utils/eval_utils/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,23 @@
from .config import CAST_TO_NONE


# Keys that are added by combine_scores / update_scores and must never be
# treated as crop-level result dicts during aggregation.
_AGGREGATION_KEYS = frozenset(
{
"label_scores",
"overall_thing_pq",
"overall_stuff_pq",
"overall_score",
"overall_instance_score",
"overall_semantic_score",
"total_evals",
"num_evals_done",
"git_version",
}
)


def combine_scores(
scores,
include_missing=True,
Expand Down Expand Up @@ -63,6 +80,8 @@ def combine_scores(
pq_lists: dict[str, list[float]] = {}
accum: dict[str, dict] = {}
for crop_name, crop_scores in scores.items():
if crop_name in _AGGREGATION_KEYS:
continue
if not isinstance(crop_scores, dict):
continue
for label, score in crop_scores.items():
Expand Down
40 changes: 40 additions & 0 deletions tests/test_evaluate_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,46 @@ def test_combine_scores_instance_and_semantic():
assert np.isclose(combined["overall_score"], 0.8)


def test_combine_scores_idempotent_on_combined_dict():
"""combine_scores must not re-consume aggregation keys added by a prior call.

If an already-combined dict (containing label_scores / overall_* / etc.)
is passed to combine_scores a second time, the result should be identical to
the first call — not corrupted by treating aggregation entries as crop data.
"""
from cellmap_segmentation_challenge import evaluate as ev

scores = {
"crop1": {
"instance": {
"tp": 2,
"fp": 1,
"fn": 0,
"sum_iou": 1.6,
"num_voxels": 8,
"voxel_size": (1.0, 1.0, 1.0),
"is_missing": False,
"status": "scored",
}
},
}

first = ev.combine_scores(scores, include_missing=True, instance_classes=["instance"])
# Call again with the already-combined dict
second = ev.combine_scores(first, include_missing=True, instance_classes=["instance"])

# Scores must be identical across both calls
assert np.isclose(first["overall_score"], second["overall_score"])
assert np.isclose(
first["label_scores"]["instance"]["pq"],
second["label_scores"]["instance"]["pq"],
)
assert np.isclose(
first["label_scores"]["instance"]["tp"],
second["label_scores"]["instance"]["tp"],
)


# ------------------------
# score_label & score_submission-style integration
# ------------------------
Expand Down
Loading