From cb4557339ffc9205bdf652044e04b2dea089b531 Mon Sep 17 00:00:00 2001
From: lapertor <raphael.lapertot@gmail.com>
Date: Tue, 12 May 2026 10:51:38 +0200
Subject: [PATCH 1/5] Enh: add IncrementalWatershedCut for interactive seeded
 watershed

Add `incremental_watershed_cut` C++ class and `IncrementalWatershedCut`
Python class that maintain a cached BPT to update seeded watershed cuts
incrementally, avoiding a full recomputation on each seed change.

Based on the algorithm described in:
  Q. Lebon, J. Lefevre, J. Cousty, B. Perret.
  'Interactive Segmentation With Incremental Watershed Cuts', CIARP 2023.
  https://hal.science/hal-04069187v1

Signed-off-by: lapertor <raphael.lapertot@gmail.com>
---
 higra/algo/py_watershed.cpp             |  32 +++
 higra/algo/watershed.py                 |  66 ++++++
 include/higra/algo/watershed.hpp        | 277 ++++++++++++++++++++++++
 test/cpp/algo/test_watershed.cpp        | 116 ++++++++++
 test/python/test_algo/test_watershed.py | 173 +++++++++++++++
 5 files changed, 664 insertions(+)
diff --git a/higra/algo/py_watershed.cpp b/higra/algo/py_watershed.cpp
index 24402710..937f8a08 100644
--- a/higra/algo/py_watershed.cpp
+++ b/higra/algo/py_watershed.cpp
@@ -62,6 +62,38 @@ namespace py_watershed {
 
         add_type_overloads<def_labelisation_watershed<hg::ugraph>, HG_TEMPLATE_NUMERIC_TYPES>(m, "");
         add_type_overloads<def_labelisation_seeded_watershed<hg::ugraph>, HG_TEMPLATE_NUMERIC_TYPES>(m, "");
+
+        auto cls = py::class_<hg::incremental_watershed_cut>(m, "IncrementalWatershedCut",
+                "Incremental seeded watershed cut based on the binary partition tree.\n\n"
+                "Provides efficient computation of seeded watershed cuts in an interactive "
+                "segmentation setting, where seeds are added and removed incrementally.");
+
+        cls.def(py::init<const hg::tree &, const hg::ugraph &>(),
+                py::arg("bpt"),
+                py::arg("mst"),
+                "Create an incremental watershed cut object from a binary partition tree and its MST.");
+
+        cls.def("_add_seeds",
+                [](hg::incremental_watershed_cut &self,
+                   const pyarray<hg::index_t> &seed_vertices,
+                   const pyarray<hg::index_t> &seed_labels) {
+                    self.add_seeds(seed_vertices, seed_labels);
+                },
+                "Add seeds with given labels.",
+                py::arg("seed_vertices"),
+                py::arg("seed_labels"));
+
+        cls.def("_remove_seeds",
+                [](hg::incremental_watershed_cut &self,
+                   const pyarray<hg::index_t> &seed_vertices) {
+                    self.remove_seeds(seed_vertices);
+                },
+                "Remove seeds.",
+                py::arg("seed_vertices"));
+
+        cls.def("_get_labeling",
+                &hg::incremental_watershed_cut::get_labeling,
+                "Compute and return the current vertex labeling.");
     }
 }
 
diff --git a/higra/algo/watershed.py b/higra/algo/watershed.py
index a3530c62..18e0dc79 100644
--- a/higra/algo/watershed.py
+++ b/higra/algo/watershed.py
@@ -73,3 +73,69 @@ def labelisation_seeded_watershed(graph, edge_weights, vertex_seeds, background_
 
     labels = hg.delinearize_vertex_weights(labels, graph)
     return labels
+
+
+class IncrementalWatershedCut:
+    """
+    Incremental seeded watershed cut based on the binary partition tree.
+
+    This class provides an efficient way to compute seeded watershed cuts
+    in an interactive segmentation setting, where seeds are added and removed
+    incrementally. Instead of recomputing the full watershed from scratch at
+    each interaction, only the affected regions are updated.
+
+    The algorithm maintains a canonical BPT and a visitCount array to identify
+    watershed edges. The labeling is obtained by BFS on the MST forest.
+
+    Reference:
+
+        Q. Lebon, J. Lefevre, J. Cousty, B. Perret.
+        `Interactive Segmentation With Incremental Watershed Cuts <https://hal.science/hal-04069187v1>`_.
+        CIARP 2023.
+
+    :param graph: input graph (must be connected)
+    :param edge_weights: Weights on the edges of the graph
+    """
+
+    def __init__(self, graph, edge_weights):
+        tree, _ = hg.bpt_canonical(graph, edge_weights, compute_mst=True)
+        mst = hg.CptBinaryHierarchy.get_mst(tree)
+        self._impl = hg.cpp.IncrementalWatershedCut(tree, mst)
+        self._graph = graph
+
+    def add_seeds(self, seed_vertices, seed_labels):
+        """
+        Add seeds to the current watershed cut.
+
+        Each seed is defined by a vertex index and a label. Two seeds cannot share
+        the same vertex but can share the same label (resulting in merged regions
+        in the output labeling). Labels must be non-zero (0 is reserved for
+        unlabeled/background vertices).
+
+        :param seed_vertices: 1d array of seed vertex indices
+        :param seed_labels: 1d array of seed labels (same size as seed_vertices)
+        """
+        seed_vertices = np.asarray(seed_vertices, dtype=np.int64).ravel()
+        seed_labels = np.asarray(seed_labels, dtype=np.int64).ravel()
+        self._impl._add_seeds(seed_vertices, seed_labels)
+
+    def remove_seeds(self, seed_vertices):
+        """
+        Remove seeds from the current watershed cut.
+
+        :param seed_vertices: 1d array of seed vertex indices to remove
+        """
+        seed_vertices = np.asarray(seed_vertices, dtype=np.int64).ravel()
+        self._impl._remove_seeds(seed_vertices)
+
+    def get_labeling(self):
+        """
+        Compute and return the current vertex labeling.
+
+        Vertices with no seed in their component are labeled 0 (background).
+
+        :return: A labeling of the graph vertices
+        """
+        labels = self._impl._get_labeling()
+        labels = hg.delinearize_vertex_weights(labels, self._graph)
+        return labels
diff --git a/include/higra/algo/watershed.hpp b/include/higra/algo/watershed.hpp
index 07e656ca..8be35e53 100644
--- a/include/higra/algo/watershed.hpp
+++ b/include/higra/algo/watershed.hpp
@@ -14,9 +14,13 @@
 #include "../graph.hpp"
 #include "../structure/array.hpp"
 #include "higra/structure/unionfind.hpp"
+#include "higra/hierarchy/hierarchy_core.hpp"
+#include "higra/algo/graph_core.hpp"
 #include "higra/sorting.hpp"
 #include <vector>
 #include <stack>
+#include <queue>
+#include <unordered_map>
 
 namespace hg {
 
@@ -171,4 +175,277 @@ namespace hg {
         return labels;
     };
 
+
+    /**
+     * Incremental seeded watershed cut based on the binary partition tree.
+     *
+     * This class provides an efficient way to compute seeded watershed cuts
+     * in an interactive segmentation setting, where seeds are added and removed
+     * incrementally.
+     *
+     * The algorithm maintains a canonical BPT and a visitCount array to identify
+     * watershed edges without recomputing from scratch at each interaction.
+     * The labeling is cached and updated locally when seeds change.
+     *
+     * Reference:
+     * Quentin Lebon, Josselin Lefevre, Jean Cousty, Benjamin Perret.
+     * Interactive Segmentation With Incremental Watershed Cuts.
+     * CIARP 2023.
+     */
+    class incremental_watershed_cut {
+
+    public:
+
+        /**
+         * Create an incremental watershed cut object from a BPT and its
+         * associated MST.
+         *
+         * The MST edges must be ordered consistently with the BPT internal
+         * nodes: MST edge i corresponds to BPT internal node (num_leaves + i).
+         * This ordering is guaranteed when the MST is built from bpt_canonical
+         * via subgraph_spanning.
+         *
+         * @param bpt canonical binary partition tree
+         * @param mst minimum spanning tree of the original graph (as ugraph)
+         */
+        incremental_watershed_cut(tree bpt, ugraph mst) :
+                m_bpt(std::move(bpt)),
+                m_mst(std::move(mst)),
+                m_num_leaves((index_t)num_leaves(m_bpt)),
+                m_root((index_t)hg::num_vertices(m_bpt) - 1),
+                m_visit_count(hg::num_vertices(m_bpt), 0),
+                m_is_cut(m_num_leaves - 1, false),
+                m_labels(xt::zeros<index_t>({(size_t)m_num_leaves})) {
+            HG_TRACE();
+            hg_assert((index_t)num_vertices(m_mst) == m_num_leaves,
+                      "MST must have the same number of vertices as leaves in the BPT.");
+            hg_assert((index_t)num_edges(m_mst) == m_num_leaves - 1,
+                      "MST must have exactly num_leaves - 1 edges.");
+        }
+
+        /**
+         * Add seeds to the current watershed cut.
+         *
+         * Each seed is defined by a vertex and a label. Two seeds cannot share
+         * the same vertex but can share the same label (resulting in merged regions
+         * in the output labeling). Labels must be non-zero (0 is reserved for
+         * unlabeled/background vertices).
+         *
+         * @tparam T1 integral type for seed vertices
+         * @tparam T2 integral type for seed labels
+         * @param xseed_vertices 1d array of seed vertex indices
+         * @param xseed_labels 1d array of seed labels (same size as seed_vertices)
+         */
+        template<typename T1, typename T2>
+        void add_seeds(const xt::xexpression<T1> &xseed_vertices,
+                       const xt::xexpression<T2> &xseed_labels) {
+            HG_TRACE();
+            auto &seed_vertices = xseed_vertices.derived_cast();
+            auto &seed_labels = xseed_labels.derived_cast();
+            hg_assert_1d_array(seed_vertices);
+            hg_assert_1d_array(seed_labels);
+            hg_assert(seed_vertices.size() == seed_labels.size(),
+                      "seed_vertices and seed_labels must have the same size.");
+
+            for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
+                auto v = (index_t)seed_vertices(i);
+                auto l = (index_t)seed_labels(i);
+                hg_assert(v >= 0 && v < m_num_leaves, "Seed vertex out of range.");
+                hg_assert(l != 0, "Seed label must be non-zero (0 is reserved for background).");
+                hg_assert(m_seed_labels.find(v) == m_seed_labels.end(),
+                          "Vertex is already a seed.");
+
+                m_seed_labels[v] = l;
+
+                // Algorithm 1 (Lebon et al.): walk up BPT, increment visitCount
+                index_t n = v;
+                while (n != m_root && m_visit_count[n] != 2) {
+                    n = m_bpt.parent(n);
+                    m_visit_count[n] += 1;
+                    if (m_visit_count[n] == 2) {
+                        m_is_cut[n - m_num_leaves] = true;
+                    }
+                }
+
+                // Local relabeling: BFS from v in its component
+                relabel_component_from_seed(v, l);
+            }
+        }
+
+        /**
+         * Remove seeds from the current watershed cut.
+         *
+         * @tparam T1 integral type for seed vertices
+         * @param xseed_vertices 1d array of seed vertex indices to remove
+         */
+        template<typename T1>
+        void remove_seeds(const xt::xexpression<T1> &xseed_vertices) {
+            HG_TRACE();
+            auto &seed_vertices = xseed_vertices.derived_cast();
+            hg_assert_1d_array(seed_vertices);
+
+            for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
+                auto v = (index_t)seed_vertices(i);
+                hg_assert(v >= 0 && v < m_num_leaves, "Seed vertex out of range.");
+                hg_assert(m_seed_labels.find(v) != m_seed_labels.end(),
+                          "Vertex is not a seed.");
+
+                m_seed_labels.erase(v);
+
+                // Algorithm 2 (Lebon et al.): walk up BPT, decrement visitCount
+                index_t n = v;
+                while (n != m_root && m_visit_count[n] != 1) {
+                    n = m_bpt.parent(n);
+                    m_visit_count[n] -= 1;
+                    if (m_visit_count[n] == 1) {
+                        m_is_cut[n - m_num_leaves] = false;
+                    }
+                }
+
+                // Local relabeling: find merged component and relabel from remaining seeds
+                relabel_merged_component(v);
+            }
+        }
+
+        /**
+         * Return the current vertex labeling.
+         *
+         * The labeling is maintained incrementally by add_seeds and remove_seeds.
+         * Vertices with no seed in their component are labeled 0 (background).
+         *
+         * @return 1d array of labels on graph vertices
+         */
+        const array_1d<index_t> &get_labeling() const {
+            return m_labels;
+        }
+
+    private:
+
+        /**
+         * BFS from seed vertex v, labeling all reachable vertices (not crossing
+         * cut edges) with the given label.
+         *
+         * After add_seeds creates new cuts, v's component is guaranteed to
+         * contain no other seed (the binary tree structure of the BPT ensures
+         * that visitCount == 2 at the LCA of v and any existing seed).
+         */
+        void relabel_component_from_seed(index_t v, index_t label) {
+            std::queue<index_t> queue;
+            m_labels(v) = label;
+            queue.push(v);
+            while (!queue.empty()) {
+                auto u = queue.front();
+                queue.pop();
+                for (auto e : out_edge_iterator(u, m_mst)) {
+                    auto neighbor = target(e, m_mst);
+                    auto edge_idx = index(e, m_mst);
+                    if (!m_is_cut[edge_idx] && m_labels(neighbor) != label) {
+                        m_labels(neighbor) = label;
+                        queue.push(neighbor);
+                    }
+                }
+            }
+        }
+
+        /**
+         * After removing a seed at vertex v: find the merged component
+         * (BFS from v respecting current cuts), reset labels to 0, then
+         * relabel from all remaining seeds in the component.
+         */
+        void relabel_merged_component(index_t v) {
+            // Step 1: BFS from v to find the merged component and collect seeds
+            std::queue<index_t> queue;
+            std::vector<index_t> component;
+            std::vector<std::pair<index_t, index_t>> seeds_in_component;
+
+            m_labels(v) = -1; // temporary marker
+            queue.push(v);
+            while (!queue.empty()) {
+                auto u = queue.front();
+                queue.pop();
+                component.push_back(u);
+                auto it = m_seed_labels.find(u);
+                if (it != m_seed_labels.end()) {
+                    seeds_in_component.push_back({u, it->second});
+                }
+                for (auto e : out_edge_iterator(u, m_mst)) {
+                    auto neighbor = target(e, m_mst);
+                    auto edge_idx = index(e, m_mst);
+                    if (!m_is_cut[edge_idx] && m_labels(neighbor) != -1) {
+                        m_labels(neighbor) = -1; // temporary marker
+                        queue.push(neighbor);
+                    }
+                }
+            }
+
+            // Step 2: reset component labels to 0
+            for (auto u : component) {
+                m_labels(u) = 0;
+            }
+
+            // Step 3: relabel from remaining seeds in the component
+            for (const auto &seed : seeds_in_component) {
+                auto sv = seed.first;
+                auto sl = seed.second;
+                if (m_labels(sv) != 0) continue;
+                m_labels(sv) = sl;
+                queue.push(sv);
+                while (!queue.empty()) {
+                    auto u = queue.front();
+                    queue.pop();
+                    for (auto e : out_edge_iterator(u, m_mst)) {
+                        auto neighbor = target(e, m_mst);
+                        auto edge_idx = index(e, m_mst);
+                        if (!m_is_cut[edge_idx] && m_labels(neighbor) == 0) {
+                            m_labels(neighbor) = sl;
+                            queue.push(neighbor);
+                        }
+                    }
+                }
+            }
+        }
+
+        tree m_bpt;
+        ugraph m_mst;
+        index_t m_num_leaves;
+        index_t m_root;
+
+        // visitCount array on BPT nodes (Algorithm 1 & 2)
+        std::vector<index_t> m_visit_count;
+
+        // cut state of each MST edge
+        std::vector<bool> m_is_cut;
+
+        // current seeds: vertex -> label
+        std::unordered_map<index_t, index_t> m_seed_labels;
+
+        // cached vertex labeling, updated locally by add_seeds/remove_seeds
+        array_1d<index_t> m_labels;
+    };
+
+    /**
+     * Create an incremental watershed cut object from an edge-weighted graph.
+     *
+     * Builds the canonical BPT and MST, then constructs an
+     * incremental_watershed_cut object.
+     *
+     * @tparam graph_t
+     * @tparam T
+     * @param graph input graph (must be connected)
+     * @param xedge_weights edge weights
+     * @return an incremental_watershed_cut object
+     */
+    template<typename graph_t, typename T>
+    auto make_incremental_watershed_cut(const graph_t &graph,
+                                        const xt::xexpression<T> &xedge_weights) {
+        HG_TRACE();
+        auto &edge_weights = xedge_weights.derived_cast();
+        hg_assert_edge_weights(graph, edge_weights);
+        hg_assert_1d_array(edge_weights);
+
+        auto bpt_res = bpt_canonical(graph, edge_weights);
+        auto mst = subgraph_spanning(graph, bpt_res.mst_edge_map);
+        return incremental_watershed_cut(std::move(bpt_res.tree), std::move(mst));
+    }
+
 }
diff --git a/test/cpp/algo/test_watershed.cpp b/test/cpp/algo/test_watershed.cpp
index 7fcf439f..1f75ffc0 100644
--- a/test/cpp/algo/test_watershed.cpp
+++ b/test/cpp/algo/test_watershed.cpp
@@ -128,4 +128,120 @@ namespace test_watershed {
         REQUIRE((labels == expected));
     }
 
+    TEST_CASE("incremental watershed cut basic", "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({4, 4});
+        array_1d<int> edge_weights{1, 2, 5, 5, 4, 8, 1, 4, 3, 4, 4, 1, 5, 2, 6, 2, 5, 2, 0, 7, 0, 3, 4, 0};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+
+        // Add seeds matching seeded watershed test 2
+        array_1d<index_t> seed_v1{0, 1, 4};
+        array_1d<index_t> seed_l1{1, 1, 1};
+        iws.add_seeds(seed_v1, seed_l1);
+
+        array_1d<index_t> seed_v2{12, 13};
+        array_1d<index_t> seed_l2{2, 2};
+        iws.add_seeds(seed_v2, seed_l2);
+
+        array_1d<index_t> seed_v3{14, 15};
+        array_1d<index_t> seed_l3{3, 3};
+        iws.add_seeds(seed_v3, seed_l3);
+
+        auto labels = iws.get_labeling();
+
+        // Check that each seed vertex has the correct label
+        REQUIRE(labels(0) == 1);
+        REQUIRE(labels(1) == 1);
+        REQUIRE(labels(4) == 1);
+        REQUIRE(labels(12) == 2);
+        REQUIRE(labels(13) == 2);
+        REQUIRE(labels(14) == 3);
+        REQUIRE(labels(15) == 3);
+
+        // All vertices should be labeled (no background)
+        for (index_t i = 0; i < 16; i++) {
+            REQUIRE(labels(i) != 0);
+        }
+    }
+
+    TEST_CASE("incremental watershed cut remove seed", "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({2, 3});
+        array_1d<int> edge_weights{1, 0, 2, 0, 0, 1, 2};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+
+        array_1d<index_t> sv1{0};
+        array_1d<index_t> sl1{1};
+        iws.add_seeds(sv1, sl1);
+
+        array_1d<index_t> sv2{2};
+        array_1d<index_t> sl2{2};
+        iws.add_seeds(sv2, sl2);
+
+        array_1d<index_t> sv3{4};
+        array_1d<index_t> sl3{3};
+        iws.add_seeds(sv3, sl3);
+
+        // Three seeds => three regions
+        auto labels3 = iws.get_labeling();
+        REQUIRE(labels3(0) == 1);
+        REQUIRE(labels3(2) == 2);
+        REQUIRE(labels3(4) == 3);
+
+        // Remove seed at vertex 4 => two regions remain
+        array_1d<index_t> rm{4};
+        iws.remove_seeds(rm);
+
+        auto labels2 = iws.get_labeling();
+        REQUIRE(labels2(0) == 1);
+        REQUIRE(labels2(2) == 2);
+        // Vertex 4 should now be in one of the remaining regions
+        REQUIRE((labels2(4) == 1 || labels2(4) == 2));
+    }
+
+    TEST_CASE("incremental watershed cut shared labels", "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({2, 3});
+        array_1d<int> edge_weights{1, 0, 2, 0, 0, 1, 2};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+
+        // Two seeds with the same label (label 5) and one with label 7
+        array_1d<index_t> sv{0, 2, 1};
+        array_1d<index_t> sl{5, 5, 7};
+        iws.add_seeds(sv, sl);
+
+        auto labels = iws.get_labeling();
+        REQUIRE(labels(0) == 5);
+        REQUIRE(labels(2) == 5);
+        REQUIRE(labels(1) == 7);
+    }
+
+    TEST_CASE("incremental watershed cut no seeds", "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+
+        auto labels = iws.get_labeling();
+        for (index_t i = 0; i < 4; i++) {
+            REQUIRE(labels(i) == 0);
+        }
+    }
+
+    TEST_CASE("incremental watershed cut single seed", "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+
+        array_1d<index_t> sv{0};
+        array_1d<index_t> sl{1};
+        iws.add_seeds(sv, sl);
+
+        auto labels = iws.get_labeling();
+        for (index_t i = 0; i < 4; i++) {
+            REQUIRE(labels(i) == 1);
+        }
+    }
+
 }
diff --git a/test/python/test_algo/test_watershed.py b/test/python/test_algo/test_watershed.py
index cad2dab5..df76b2de 100644
--- a/test/python/test_algo/test_watershed.py
+++ b/test/python/test_algo/test_watershed.py
@@ -128,5 +128,178 @@ def test_seeded_watershed_seeds_not_in_minima(self):
         self.assertTrue(np.all(labels == expected))
 
 
+class TestIncrementalWatershed(unittest.TestCase):
+
+    def test_basic(self):
+        g = hg.get_4_adjacency_graph((4, 4))
+        edge_weights = np.asarray((1, 2, 5, 5, 4, 8, 1, 4, 3, 4, 4, 1, 5, 2, 6, 2, 5, 2, 0, 7, 0, 3, 4, 0))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        iws.add_seeds(np.array([0, 1, 4]), np.array([1, 1, 1]))
+        iws.add_seeds(np.array([12, 13]), np.array([2, 2]))
+        iws.add_seeds(np.array([14, 15]), np.array([3, 3]))
+
+        labels = iws.get_labeling()
+        labels_flat = labels.ravel()
+
+        self.assertEqual(labels_flat[0], 1)
+        self.assertEqual(labels_flat[1], 1)
+        self.assertEqual(labels_flat[4], 1)
+        self.assertEqual(labels_flat[12], 2)
+        self.assertEqual(labels_flat[13], 2)
+        self.assertEqual(labels_flat[14], 3)
+        self.assertEqual(labels_flat[15], 3)
+
+        # All vertices should be labeled
+        self.assertTrue(np.all(labels_flat != 0))
+
+    def test_remove_seed(self):
+        g = hg.get_4_adjacency_graph((2, 3))
+        edge_weights = np.asarray((1, 0, 2, 0, 0, 1, 2))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        iws.add_seeds(np.array([0]), np.array([1]))
+        iws.add_seeds(np.array([2]), np.array([2]))
+        iws.add_seeds(np.array([4]), np.array([3]))
+
+        labels3 = iws.get_labeling().ravel()
+        self.assertEqual(labels3[0], 1)
+        self.assertEqual(labels3[2], 2)
+        self.assertEqual(labels3[4], 3)
+
+        # Remove seed at vertex 4
+        iws.remove_seeds(np.array([4]))
+
+        labels2 = iws.get_labeling().ravel()
+        self.assertEqual(labels2[0], 1)
+        self.assertEqual(labels2[2], 2)
+        self.assertIn(labels2[4], [1, 2])
+
+    def test_shared_labels(self):
+        g = hg.get_4_adjacency_graph((2, 3))
+        edge_weights = np.asarray((1, 0, 2, 0, 0, 1, 2))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        iws.add_seeds(np.array([0, 2, 1]), np.array([5, 5, 7]))
+
+        labels = iws.get_labeling().ravel()
+        self.assertEqual(labels[0], 5)
+        self.assertEqual(labels[2], 5)
+        self.assertEqual(labels[1], 7)
+
+    def test_no_seeds(self):
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        labels = iws.get_labeling().ravel()
+        self.assertTrue(np.all(labels == 0))
+
+    def test_single_seed(self):
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        iws.add_seeds(np.array([0]), np.array([1]))
+
+        labels = iws.get_labeling().ravel()
+        self.assertTrue(np.all(labels == 1))
+
+    def test_add_then_remove_all(self):
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        iws.add_seeds(np.array([0, 3]), np.array([1, 2]))
+        labels = iws.get_labeling().ravel()
+        self.assertTrue(np.all(labels != 0))
+
+        iws.remove_seeds(np.array([0, 3]))
+        labels = iws.get_labeling().ravel()
+        self.assertTrue(np.all(labels == 0))
+
+    def test_consistency_with_seeded_watershed(self):
+        g = hg.get_4_adjacency_graph((4, 4))
+        edge_weights = np.asarray((1, 2, 5, 5, 4, 8, 1, 4, 3, 4, 4, 1, 5, 2, 6, 2, 5, 2, 0, 7, 0, 3, 4, 0))
+
+        seeds = np.asarray(((1, 1, 0, 0),
+                            (1, 0, 0, 0),
+                            (0, 0, 0, 0),
+                            (1, 1, 2, 2)))
+
+        expected = hg.labelisation_seeded_watershed(g, edge_weights, seeds)
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+        seed_vertices = np.where(seeds.ravel() != 0)[0]
+        seed_labels = seeds.ravel()[seed_vertices]
+        iws.add_seeds(seed_vertices, seed_labels)
+
+        labels = iws.get_labeling()
+        self.assertTrue(np.all(labels == expected))
+
+    def test_incremental_equals_batch(self):
+        g = hg.get_4_adjacency_graph((4, 4))
+        edge_weights = np.asarray((1, 2, 5, 5, 4, 8, 1, 4, 3, 4, 4, 1, 5, 2, 6, 2, 5, 2, 0, 7, 0, 3, 4, 0))
+
+        # Add all seeds at once
+        iws_batch = hg.IncrementalWatershedCut(g, edge_weights)
+        iws_batch.add_seeds(np.array([0, 1, 4, 14, 15]), np.array([1, 1, 1, 2, 2]))
+
+        # Add seeds one by one
+        iws_incr = hg.IncrementalWatershedCut(g, edge_weights)
+        iws_incr.add_seeds(np.array([0]), np.array([1]))
+        iws_incr.add_seeds(np.array([1]), np.array([1]))
+        iws_incr.add_seeds(np.array([4]), np.array([1]))
+        iws_incr.add_seeds(np.array([14]), np.array([2]))
+        iws_incr.add_seeds(np.array([15]), np.array([2]))
+
+        self.assertTrue(np.all(iws_batch.get_labeling() == iws_incr.get_labeling()))
+
+    def test_add_remove_readd(self):
+        g = hg.get_4_adjacency_graph((2, 3))
+        edge_weights = np.asarray((1, 0, 2, 0, 0, 1, 2))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        iws.add_seeds(np.array([0, 5]), np.array([1, 2]))
+        labels_first = iws.get_labeling().copy()
+
+        iws.remove_seeds(np.array([5]))
+        iws.add_seeds(np.array([5]), np.array([2]))
+        labels_readd = iws.get_labeling()
+
+        self.assertTrue(np.all(labels_first == labels_readd))
+
+    def test_larger_grid(self):
+        g = hg.get_4_adjacency_graph((10, 10))
+        rng = np.random.RandomState(42)
+        edge_weights = rng.rand(g.num_edges()).astype(np.float64)
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        # Place seeds at corners
+        iws.add_seeds(np.array([0, 9, 90, 99]), np.array([1, 2, 3, 4]))
+
+        labels = iws.get_labeling().ravel()
+
+        # Each seed vertex has the correct label
+        self.assertEqual(labels[0], 1)
+        self.assertEqual(labels[9], 2)
+        self.assertEqual(labels[90], 3)
+        self.assertEqual(labels[99], 4)
+
+        # All vertices labeled
+        self.assertTrue(np.all(labels != 0))
+
+        # Exactly 4 distinct labels
+        self.assertEqual(len(np.unique(labels)), 4)
+
+
 if __name__ == '__main__':
     unittest.main()

From b1430a74d0c79c0a6bdff3f9b3e4e3300a48068a Mon Sep 17 00:00:00 2001
From: lapertor <raphael.lapertot@gmail.com>
Date: Tue, 12 May 2026 18:24:50 +0200
Subject: [PATCH 2/5] fix(watershed): align remove_seeds with Lebon's
 algorithm, add assertions, optimize BFS

- Fix visitCount batch removal bug: walk-up loop now always decrements
  visitCount and breaks on 2->1 transition (matching Lebon's removeMarker),
  instead of stopping on visitCount==1 without decrementing.
- Add hg_assert in unreachable else-branch of Pass 2a (both sides of
  de-cut with different seed labels should not happen).
- Remove unused #include <unordered_set>.
- Optimize component_seed_label: replace per-call std::vector allocation
  with m_visited generation-counter pattern (zero-cost reset).
- Add trailing newline to watershed.rst.
- Clean test_watershed.py diff (73 insertions, no whitespace changes).
- Add 3 regression tests: batch remove equals sequential, both sides of
  edge, and interactive churn.
- Add C++ test for batch remove sibling subtrees visitCount.

Signed-off-by: lapertor <raphael.lapertot@gmail.com>
---
 doc/source/python/watershed.rst         |   6 +-
 include/higra/algo/watershed.hpp        | 174 +++++++++++++++---------
 test/cpp/algo/test_watershed.cpp        |  96 +++++++++++++
 test/python/test_algo/test_watershed.py |  73 ++++++++++
 4 files changed, 286 insertions(+), 63 deletions(-)

diff --git a/doc/source/python/watershed.rst b/doc/source/python/watershed.rst
index d079884a..6878cc87 100644
--- a/doc/source/python/watershed.rst
+++ b/doc/source/python/watershed.rst
@@ -9,7 +9,11 @@ Labelisation watershed
 
     labelisation_watershed
     labelisation_seeded_watershed
+    IncrementalWatershedCut
 
 .. autofunction:: higra.labelisation_watershed
 
-.. autofunction:: higra.labelisation_seeded_watershed
\ No newline at end of file
+.. autofunction:: higra.labelisation_seeded_watershed
+
+.. autoclass:: higra.IncrementalWatershedCut
+    :members:
diff --git a/include/higra/algo/watershed.hpp b/include/higra/algo/watershed.hpp
index 8be35e53..b550566c 100644
--- a/include/higra/algo/watershed.hpp
+++ b/include/higra/algo/watershed.hpp
@@ -191,6 +191,14 @@ namespace hg {
      * Quentin Lebon, Josselin Lefevre, Jean Cousty, Benjamin Perret.
      * Interactive Segmentation With Incremental Watershed Cuts.
      * CIARP 2023.
+     *
+     * Example:
+     *   auto iws = hg::make_incremental_watershed_cut(graph, edge_weights);
+     *   hg::array_1d<hg::index_t> sv{0, 5};
+     *   hg::array_1d<hg::index_t> sl{1, 2};
+     *   iws.add_seeds(sv, sl);
+     *   auto labels = iws.get_labeling();
+     *   iws.remove_seeds(sv);
      */
     class incremental_watershed_cut {
 
@@ -215,7 +223,8 @@ namespace hg {
                 m_root((index_t)hg::num_vertices(m_bpt) - 1),
                 m_visit_count(hg::num_vertices(m_bpt), 0),
                 m_is_cut(m_num_leaves - 1, false),
-                m_labels(xt::zeros<index_t>({(size_t)m_num_leaves})) {
+                m_labels(xt::zeros<index_t>({(size_t)m_num_leaves})),
+                m_visited(m_num_leaves, 0) {
             HG_TRACE();
             hg_assert((index_t)num_vertices(m_mst) == m_num_leaves,
                       "MST must have the same number of vertices as leaves in the BPT.");
@@ -247,6 +256,7 @@ namespace hg {
             hg_assert(seed_vertices.size() == seed_labels.size(),
                       "seed_vertices and seed_labels must have the same size.");
 
+            // Pass 1: register seeds and update BPT cut state (Algorithm 1, Lebon et al.).
             for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
                 auto v = (index_t)seed_vertices(i);
                 auto l = (index_t)seed_labels(i);
@@ -257,7 +267,6 @@ namespace hg {
 
                 m_seed_labels[v] = l;
 
-                // Algorithm 1 (Lebon et al.): walk up BPT, increment visitCount
                 index_t n = v;
                 while (n != m_root && m_visit_count[n] != 2) {
                     n = m_bpt.parent(n);
@@ -266,8 +275,11 @@ namespace hg {
                         m_is_cut[n - m_num_leaves] = true;
                     }
                 }
-
-                // Local relabeling: BFS from v in its component
+            }
+            // Pass 2: relabel each seed's component once all cuts of the batch are stable.
+            for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
+                auto v = (index_t)seed_vertices(i);
+                auto l = (index_t)seed_labels(i);
                 relabel_component_from_seed(v, l);
             }
         }
@@ -284,6 +296,15 @@ namespace hg {
             auto &seed_vertices = xseed_vertices.derived_cast();
             hg_assert_1d_array(seed_vertices);
 
+            std::vector<index_t> decut_edges;   // MST edge indices that just got un-cut
+            std::vector<index_t> lone_seeds;    // seeds whose walk-up reached the root without a 2->1 transition
+
+            // Pass 1: walk up the BPT for each removed seed, decrement visitCount,
+            // collect the de-cut MST edge indices in walk-up order. The cut state
+            // m_is_cut is NOT updated here; the update is deferred to Pass 2 so
+            // that each de-cut can be processed in isolation (the BFS for de-cut k
+            // sees only the de-cuts processed before k, which bounds the relabeling
+            // to the freshly merged region).
             for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
                 auto v = (index_t)seed_vertices(i);
                 hg_assert(v >= 0 && v < m_num_leaves, "Seed vertex out of range.");
@@ -292,18 +313,70 @@ namespace hg {
 
                 m_seed_labels.erase(v);
 
-                // Algorithm 2 (Lebon et al.): walk up BPT, decrement visitCount
                 index_t n = v;
-                while (n != m_root && m_visit_count[n] != 1) {
+                bool produced_decut = false;
+                while (n != m_root) {
                     n = m_bpt.parent(n);
                     m_visit_count[n] -= 1;
                     if (m_visit_count[n] == 1) {
-                        m_is_cut[n - m_num_leaves] = false;
+                        decut_edges.push_back(n - m_num_leaves);
+                        produced_decut = true;
+                        break;
                     }
                 }
+                if (!produced_decut) {
+                    lone_seeds.push_back(v);
+                }
+            }
+
+            // Pass 2a: for each de-cut MST edge (in collection order), reactivate
+            // the edge in the forest and propagate the surviving label across the
+            // newly merged region. The surviving side is identified from seed
+            // presence in each side component (before edge reactivation), not from
+            // endpoint label values alone.
+            for (auto k : decut_edges) {
+                const auto &e = edge_from_index(k, m_mst);
+                auto u = source(e, m_mst);
+                auto w = target(e, m_mst);
+
+                auto lu_seed = component_seed_label(u);
+                auto lw_seed = component_seed_label(w);
+
+                m_is_cut[k] = false;
+
+                index_t target_label;
+                index_t start_vertex;
+                if (lu_seed != 0 && lw_seed == 0) {
+                    target_label = lu_seed;
+                    start_vertex = w;
+                } else if (lw_seed != 0 && lu_seed == 0) {
+                    target_label = lw_seed;
+                    start_vertex = u;
+                } else if (lu_seed == 0 && lw_seed == 0) {
+                    target_label = 0;
+                    start_vertex = u;
+                } else if (lu_seed == lw_seed) {
+                    target_label = lu_seed;
+                    start_vertex = w;
+                } else {
+                    // Both sides have surviving seeds with different labels.
+                    // This should not happen: a de-cut at BPT node n means visitCount
+                    // transitioned from 2→1, so only one child subtree still has a seed.
+                    // After previous de-cuts in this batch, component_seed_label may find
+                    // seeds from merged regions, but they should have the same label.
+                    hg_assert(false, "Both sides of de-cut edge have different seed labels");
+                    target_label = 0;
+                    start_vertex = u;
+                }
+                relabel_component_from_seed(start_vertex, target_label);
+            }
 
-                // Local relabeling: find merged component and relabel from remaining seeds
-                relabel_merged_component(v);
+            // Pass 2b: a lone seed had no 2->1 transition during its walk-up,
+            // meaning it was the only seed contributing to visitCount along its
+            // entire path to the root. Its component therefore had no other seed
+            // and must now revert to background (0).
+            for (auto v : lone_seeds) {
+                relabel_component_from_seed(v, 0);
             }
         }
 
@@ -322,87 +395,60 @@ namespace hg {
     private:
 
         /**
-         * BFS from seed vertex v, labeling all reachable vertices (not crossing
-         * cut edges) with the given label.
-         *
-         * After add_seeds creates new cuts, v's component is guaranteed to
-         * contain no other seed (the binary tree structure of the BPT ensures
-         * that visitCount == 2 at the LCA of v and any existing seed).
+         * Return a seed label present in the connected component of start,
+         * considering current cut state. Returns 0 if no seed is found.
          */
-        void relabel_component_from_seed(index_t v, index_t label) {
+        index_t component_seed_label(index_t start) {
+            m_visited_generation++;
             std::queue<index_t> queue;
-            m_labels(v) = label;
-            queue.push(v);
+            queue.push(start);
+            m_visited[(size_t)start] = m_visited_generation;
+
             while (!queue.empty()) {
                 auto u = queue.front();
                 queue.pop();
+
+                auto it = m_seed_labels.find(u);
+                if (it != m_seed_labels.end()) {
+                    return it->second;
+                }
+
                 for (auto e : out_edge_iterator(u, m_mst)) {
                     auto neighbor = target(e, m_mst);
                     auto edge_idx = index(e, m_mst);
-                    if (!m_is_cut[edge_idx] && m_labels(neighbor) != label) {
-                        m_labels(neighbor) = label;
+                    if (!m_is_cut[edge_idx] && m_visited[(size_t)neighbor] != m_visited_generation) {
+                        m_visited[(size_t)neighbor] = m_visited_generation;
                         queue.push(neighbor);
                     }
                 }
             }
+            return 0;
         }
 
         /**
-         * After removing a seed at vertex v: find the merged component
-         * (BFS from v respecting current cuts), reset labels to 0, then
-         * relabel from all remaining seeds in the component.
+         * BFS from seed vertex v, labeling all reachable vertices (not crossing
+         * cut edges) with the given label.
+         *
+         * After add_seeds creates new cuts, v's component is guaranteed to
+         * contain no other seed (the binary tree structure of the BPT ensures
+         * that visitCount == 2 at the LCA of v and any existing seed).
          */
-        void relabel_merged_component(index_t v) {
-            // Step 1: BFS from v to find the merged component and collect seeds
+        void relabel_component_from_seed(index_t v, index_t label) {
             std::queue<index_t> queue;
-            std::vector<index_t> component;
-            std::vector<std::pair<index_t, index_t>> seeds_in_component;
-
-            m_labels(v) = -1; // temporary marker
+            m_labels(v) = label;
             queue.push(v);
             while (!queue.empty()) {
                 auto u = queue.front();
                 queue.pop();
-                component.push_back(u);
-                auto it = m_seed_labels.find(u);
-                if (it != m_seed_labels.end()) {
-                    seeds_in_component.push_back({u, it->second});
-                }
                 for (auto e : out_edge_iterator(u, m_mst)) {
                     auto neighbor = target(e, m_mst);
                     auto edge_idx = index(e, m_mst);
-                    if (!m_is_cut[edge_idx] && m_labels(neighbor) != -1) {
-                        m_labels(neighbor) = -1; // temporary marker
+                    if (!m_is_cut[edge_idx] && m_labels(neighbor) != label) {
+                        m_labels(neighbor) = label;
                         queue.push(neighbor);
                     }
                 }
             }
-
-            // Step 2: reset component labels to 0
-            for (auto u : component) {
-                m_labels(u) = 0;
-            }
-
-            // Step 3: relabel from remaining seeds in the component
-            for (const auto &seed : seeds_in_component) {
-                auto sv = seed.first;
-                auto sl = seed.second;
-                if (m_labels(sv) != 0) continue;
-                m_labels(sv) = sl;
-                queue.push(sv);
-                while (!queue.empty()) {
-                    auto u = queue.front();
-                    queue.pop();
-                    for (auto e : out_edge_iterator(u, m_mst)) {
-                        auto neighbor = target(e, m_mst);
-                        auto edge_idx = index(e, m_mst);
-                        if (!m_is_cut[edge_idx] && m_labels(neighbor) == 0) {
-                            m_labels(neighbor) = sl;
-                            queue.push(neighbor);
-                        }
-                    }
-                }
-            }
         }
 
         tree m_bpt;
@@ -421,6 +467,10 @@ namespace hg {
 
         // cached vertex labeling, updated locally by add_seeds/remove_seeds
         array_1d<index_t> m_labels;
+
+        // BFS visited buffer with generation counter (zero-cost reset pattern)
+        index_t m_visited_generation = 0;
+        std::vector<index_t> m_visited;
     };
 
     /**
diff --git a/test/cpp/algo/test_watershed.cpp b/test/cpp/algo/test_watershed.cpp
index 1f75ffc0..a97f4788 100644
--- a/test/cpp/algo/test_watershed.cpp
+++ b/test/cpp/algo/test_watershed.cpp
@@ -244,4 +244,100 @@ namespace test_watershed {
         }
     }
 
+    TEST_CASE("incremental watershed cut batch remove equals sequential",
+              "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({4, 4});
+        array_1d<int> edge_weights{1, 2, 5, 5, 4, 8, 1, 4, 3, 4, 4, 1,
+                                   5, 2, 6, 2, 5, 2, 0, 7, 0, 3, 4, 0};
+
+        array_1d<index_t> sv{0, 1, 4, 14, 15};
+        array_1d<index_t> sl{1, 1, 1, 2, 2};
+
+        auto a = hg::make_incremental_watershed_cut(g, edge_weights);
+        a.add_seeds(sv, sl);
+        array_1d<index_t> rm{1, 14};
+        a.remove_seeds(rm);
+
+        auto b = hg::make_incremental_watershed_cut(g, edge_weights);
+        b.add_seeds(sv, sl);
+        array_1d<index_t> rm1{1};
+        array_1d<index_t> rm2{14};
+        b.remove_seeds(rm1);
+        b.remove_seeds(rm2);
+
+        REQUIRE((a.get_labeling() == b.get_labeling()));
+    }
+
+    TEST_CASE("incremental watershed cut batch remove both sides of edge",
+              "[incremental_watershed_cut]") {
+        // Two seeds -> exactly one cut edge; remove both at once -> background everywhere.
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        array_1d<index_t> sv{0, 3};
+        array_1d<index_t> sl{1, 2};
+        iws.add_seeds(sv, sl);
+        iws.remove_seeds(sv);
+
+        const auto &labels = iws.get_labeling();
+        for (index_t i = 0; i < 4; i++) {
+            REQUIRE(labels(i) == 0);
+        }
+    }
+
+    TEST_CASE("incremental watershed cut interactive churn",
+              "[incremental_watershed_cut]") {
+        // Regression test for scenario 07: interactive single seed churn with label changes.
+        auto g = hg::get_4_adjacency_graph({100, 100});
+        const index_t n_edges = g.num_edges();
+        array_1d<double> edge_weights = xt::empty<double>({n_edges});
+        for (index_t i = 0; i < n_edges; ++i) {
+            // Deterministic pseudo-random in [0, 1)
+            edge_weights(i) = static_cast<double>(i % 997) / 997.0;
+        }
+
+        // Deterministic vertices matching scenario 07
+        array_1d<index_t> sv{4098, 8671, 7466, 737, 5621,
+                             5954, 3197, 7184, 7657, 3043};
+        array_1d<index_t> initial_labels{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+        const index_t changed_label = 99;
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+
+        // Add all 10 seeds
+        iws.add_seeds(sv, initial_labels);
+        auto labels = iws.get_labeling();
+
+        // Baseline via full seeded watershed
+        array_1d<index_t> seeds = xt::zeros<index_t>({g.num_vertices()});
+        for (index_t i = 0; i < 10; ++i) {
+            seeds(sv(i)) = initial_labels(i);
+        }
+        auto baseline = hg::labelisation_seeded_watershed(g, edge_weights, seeds);
+        REQUIRE((labels == baseline));
+
+        // Churn first 5 seeds: remove then re-add with changed label
+        for (index_t i = 0; i < 5; ++i) {
+            index_t v = sv(i);
+            array_1d<index_t> rm{v};
+            array_1d<index_t> add_v{v};
+            array_1d<index_t> add_l{changed_label};
+
+            // Remove
+            iws.remove_seeds(rm);
+            seeds(v) = 0;
+            labels = iws.get_labeling();
+            baseline = hg::labelisation_seeded_watershed(g, edge_weights, seeds);
+            REQUIRE((labels == baseline));
+
+            // Re-add with changed label
+            iws.add_seeds(add_v, add_l);
+            seeds(v) = changed_label;
+            labels = iws.get_labeling();
+            baseline = hg::labelisation_seeded_watershed(g, edge_weights, seeds);
+            REQUIRE((labels == baseline));
+        }
+    }
+
 }
diff --git a/test/python/test_algo/test_watershed.py b/test/python/test_algo/test_watershed.py
index df76b2de..d59750a7 100644
--- a/test/python/test_algo/test_watershed.py
+++ b/test/python/test_algo/test_watershed.py
@@ -300,6 +300,79 @@ def test_larger_grid(self):
         # Exactly 4 distinct labels
         self.assertEqual(len(np.unique(labels)), 4)
 
+    def test_batch_remove_equals_sequential(self):
+        g = hg.get_4_adjacency_graph((4, 4))
+        edge_weights = np.asarray((1, 2, 5, 5, 4, 8, 1, 4, 3, 4, 4, 1,
+                                   5, 2, 6, 2, 5, 2, 0, 7, 0, 3, 4, 0))
+        sv = np.array([0, 1, 4, 14, 15])
+        sl = np.array([1, 1, 1, 2, 2])
+
+        a = hg.IncrementalWatershedCut(g, edge_weights)
+        a.add_seeds(sv, sl)
+        a.remove_seeds(np.array([1, 14]))
+
+        b = hg.IncrementalWatershedCut(g, edge_weights)
+        b.add_seeds(sv, sl)
+        b.remove_seeds(np.array([1]))
+        b.remove_seeds(np.array([14]))
+
+        self.assertTrue(np.all(a.get_labeling() == b.get_labeling()))
+
+    def test_batch_remove_both_sides_of_edge(self):
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+        iws.add_seeds(np.array([0, 3]), np.array([1, 2]))
+        iws.remove_seeds(np.array([0, 3]))
+        self.assertTrue(np.all(iws.get_labeling().ravel() == 0))
+
+    def test_interactive_churn(self):
+        """
+        Regression test for scenario 07: interactive single seed churn with label changes.
+        Adds 10 seeds on a 100x100 grid, then alternately removes and re-adds 5 of them
+        with changed labels, verifying consistency with labelisation_seeded_watershed
+        at each step.
+        """
+        g = hg.get_4_adjacency_graph((100, 100))
+        rng = np.random.RandomState(42)
+        edge_weights = rng.rand(g.num_edges()).astype(np.float64)
+
+        # Deterministic vertices matching scenario 07
+        sv = np.array([4098, 8671, 7466, 737, 5621,
+                       5954, 3197, 7184, 7657, 3043], dtype=np.int64)
+        initial_labels = np.arange(1, 11, dtype=np.int64)
+        changed_label = 99
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        # Add all 10 seeds
+        iws.add_seeds(sv, initial_labels)
+        labels = iws.get_labeling().ravel()
+
+        # Baseline via full seeded watershed
+        seeds = np.zeros(g.num_vertices(), dtype=np.int64)
+        seeds[sv] = initial_labels
+        baseline = hg.labelisation_seeded_watershed(g, edge_weights, seeds.reshape(100, 100))
+        self.assertTrue(np.all(labels == baseline.ravel()))
+
+        # Churn: remove and re-add first 5 seeds with label 99
+        for v in sv[:5]:
+            # Remove
+            iws.remove_seeds(np.array([v]))
+            seeds[v] = 0
+            labels = iws.get_labeling().ravel()
+            baseline = hg.labelisation_seeded_watershed(
+                g, edge_weights, seeds.reshape(100, 100))
+            self.assertTrue(np.all(labels == baseline.ravel()))
+
+            # Re-add with changed label
+            iws.add_seeds(np.array([v]), np.array([changed_label]))
+            seeds[v] = changed_label
+            labels = iws.get_labeling().ravel()
+            baseline = hg.labelisation_seeded_watershed(
+                g, edge_weights, seeds.reshape(100, 100))
+            self.assertTrue(np.all(labels == baseline.ravel()))
+
 
 if __name__ == '__main__':
     unittest.main()

From fc042fa2b98b5e8184c306c63f0bfc451f2837a5 Mon Sep 17 00:00:00 2001
From: lapertor <raphael.lapertot@gmail.com>
Date: Wed, 13 May 2026 14:41:36 +0200
Subject: [PATCH 3/5] doc(watershed): document complexity, clarify Pass 2a,
 extend tests

- Add a :Complexity: section to the IncrementalWatershedCut Python
  docstring and to the Doxygen block of the C++ class. It breaks down
  construction (O(n log n), dominated by the bpt_canonical edge sort),
  add_seeds (O(K * d + S) with S <= num_vertices, by the visitCount==2
  disjointness invariant on the K new MST-forest components),
  remove_seeds (O(K * d + S') with S' <= num_vertices for batches whose
  de-cuts touch mostly disjoint regions, and up to O(K * num_vertices)
  in the worst case of cascading merges within the same batch), and
  get_labeling (O(1), labeling maintained incrementally).
- Document the no-relabeling restriction in add_seeds: a vertex that
  is already a seed must be removed first if its label must change.
- Rewrite the Pass 1 comment in remove_seeds: the de-cut collection
  order is the batch insertion order, not a walk-up order. Correctness
  of the resulting labeling does not depend on the relative BPT depth
  of the de-cuts within the batch.
- Strengthen the rationale for the unreachable else branch in Pass 2a:
  cite the 2->1 visitCount transition invariant and Lebon Algorithm 2;
  the release-mode fallback now reads as an explicit safety net.
- Add three regression tests (Python and C++):
    - decuts_ancestor_descendant: 1x8 path with controlled weights so
      a batch remove produces de-cuts at different BPT depths (one
      node is the descendant of the other). Compared to the full
      labelisation_seeded_watershed for every result.
    - add_seed_already_exists: assertion is raised on duplicate seed.
    - add_seed_label_zero: assertion is raised on label == 0.

No behavioral change on the public API.

Signed-off-by: lapertor <raphael.lapertot@gmail.com>
---
 higra/algo/watershed.py                 | 34 +++++++++++++++++
 include/higra/algo/watershed.hpp        | 46 ++++++++++++++++++-----
 test/cpp/algo/test_watershed.cpp        | 50 +++++++++++++++++++++++++
 test/python/test_algo/test_watershed.py | 46 +++++++++++++++++++++++
 4 files changed, 167 insertions(+), 9 deletions(-)

diff --git a/higra/algo/watershed.py b/higra/algo/watershed.py
index 18e0dc79..47901190 100644
--- a/higra/algo/watershed.py
+++ b/higra/algo/watershed.py
@@ -87,6 +87,31 @@ class IncrementalWatershedCut:
     The algorithm maintains a canonical BPT and a visitCount array to identify
     watershed edges. The labeling is obtained by BFS on the MST forest.
 
+    :Complexity:
+
+    Construction is dominated by the canonical BPT and MST construction in
+    :math:`\\mathcal{O}(n \\log n)` with :math:`n` the number of edges in the
+    input graph (dominated by the edge sort).
+
+    :func:`add_seeds` on a batch of :math:`K` seeds runs in
+    :math:`\\mathcal{O}(K \\cdot d + S)` where :math:`d` is the height of the
+    BPT (:math:`\\mathcal{O}(\\log N)` for balanced trees, :math:`\\mathcal{O}(N)`
+    worst case, with :math:`N` the number of vertices) and :math:`S` is the
+    total size of the :math:`K` MST-forest components relabeled in the second
+    pass. By the :math:`\\text{visit\\_count} == 2` invariant these :math:`K`
+    components are pairwise disjoint, hence :math:`S \\leq N`.
+
+    :func:`remove_seeds` on a batch of :math:`K` seeds runs in
+    :math:`\\mathcal{O}(K \\cdot d + S')` where :math:`d` is as above and
+    :math:`S'` is the total work performed by the BFS calls during the de-cut
+    and relabel phase. When the de-cuts of the batch touch mostly disjoint
+    regions, :math:`S' \\leq N`; in the worst case of cascading merges within
+    the same batch (each removal extends a growing super-component),
+    :math:`S'` can grow to :math:`\\mathcal{O}(K \\cdot N)`.
+
+    :func:`get_labeling` is :math:`\\mathcal{O}(1)` (the labeling is
+    maintained incrementally).
+
     Reference:
 
         Q. Lebon, J. Lefevre, J. Cousty, B. Perret.
@@ -112,11 +137,18 @@ def add_seeds(self, seed_vertices, seed_labels):
         in the output labeling). Labels must be non-zero (0 is reserved for
         unlabeled/background vertices).
 
+        Re-adding a seed on a vertex that is already a seed (regardless of the
+        label) raises an exception. To change the label of an existing seed,
+        call :func:`remove_seeds` first and then :func:`add_seeds` with the new
+        label.
+
         :param seed_vertices: 1d array of seed vertex indices
         :param seed_labels: 1d array of seed labels (same size as seed_vertices)
         """
         seed_vertices = np.asarray(seed_vertices, dtype=np.int64).ravel()
         seed_labels = np.asarray(seed_labels, dtype=np.int64).ravel()
+        if seed_vertices.size != seed_labels.size:
+            raise ValueError("seed_vertices and seed_labels must have the same size.")
         self._impl._add_seeds(seed_vertices, seed_labels)
 
     def remove_seeds(self, seed_vertices):
@@ -126,6 +158,8 @@ def remove_seeds(self, seed_vertices):
         :param seed_vertices: 1d array of seed vertex indices to remove
         """
         seed_vertices = np.asarray(seed_vertices, dtype=np.int64).ravel()
+        if seed_vertices.size == 0:
+            return
         self._impl._remove_seeds(seed_vertices)
 
     def get_labeling(self):
diff --git a/include/higra/algo/watershed.hpp b/include/higra/algo/watershed.hpp
index b550566c..b85531de 100644
--- a/include/higra/algo/watershed.hpp
+++ b/include/higra/algo/watershed.hpp
@@ -187,6 +187,24 @@ namespace hg {
      * watershed edges without recomputing from scratch at each interaction.
      * The labeling is cached and updated locally when seeds change.
      *
+     * Complexity:
+     *   Construction:    O(n log n) with n the number of edges in the input
+     *                    graph (dominated by the sort in bpt_canonical).
+     *   add_seeds(K):    O(K * d + S) where d is the height of the BPT
+     *                    (O(log N) for balanced trees, O(N) worst case, with
+     *                    N the number of vertices) and S is the total size of
+     *                    the K MST-forest components relabeled in pass 2.
+     *                    By the visit_count==2 invariant these K components
+     *                    are pairwise disjoint, hence S <= N.
+     *   remove_seeds(K): O(K * d + S') where d is as above and S' is the
+     *                    total work performed by the BFS calls of pass 2a
+     *                    and 2b. When the de-cuts of the batch touch mostly
+     *                    disjoint regions, S' <= N; in the worst case of
+     *                    cascading merges within the same batch (each removal
+     *                    extends a growing super-component), S' can grow to
+     *                    O(K * N).
+     *   get_labeling:    O(1) (the labeling is maintained incrementally).
+     *
      * Reference:
      * Quentin Lebon, Josselin Lefevre, Jean Cousty, Benjamin Perret.
      * Interactive Segmentation With Incremental Watershed Cuts.
@@ -300,11 +318,15 @@ namespace hg {
             std::vector<index_t> lone_seeds;    // seeds whose walk-up reached the root without a 2->1 transition
 
             // Pass 1: walk up the BPT for each removed seed, decrement visitCount,
-            // collect the de-cut MST edge indices in walk-up order. The cut state
-            // m_is_cut is NOT updated here; the update is deferred to Pass 2 so
-            // that each de-cut can be processed in isolation (the BFS for de-cut k
-            // sees only the de-cuts processed before k, which bounds the relabeling
-            // to the freshly merged region).
+            // collect the de-cut MST edge indices in batch insertion order (i.e.
+            // the order seeds appear in the input array; each seed produces at
+            // most one de-cut, at the first 2->1 transition along its walk-up).
+            // The cut state m_is_cut is NOT updated here; the update is deferred
+            // to Pass 2 so that each de-cut can be processed in isolation (the
+            // BFS for de-cut k sees only the de-cuts processed before k, which
+            // bounds the relabeling to the freshly merged region). The
+            // correctness of the resulting labeling does not depend on the
+            // relative BPT depth of the de-cuts within the batch.
             for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
                 auto v = (index_t)seed_vertices(i);
                 hg_assert(v >= 0 && v < m_num_leaves, "Seed vertex out of range.");
@@ -360,10 +382,16 @@ namespace hg {
                     start_vertex = w;
                 } else {
                     // Both sides have surviving seeds with different labels.
-                    // This should not happen: a de-cut at BPT node n means visitCount
-                    // transitioned from 2→1, so only one child subtree still has a seed.
-                    // After previous de-cuts in this batch, component_seed_label may find
-                    // seeds from merged regions, but they should have the same label.
+                    // This branch is unreachable: a de-cut at BPT node n corresponds
+                    // to a visitCount(n): 2 -> 1 transition, so exactly one of the
+                    // two BPT subtrees of n lost its last active seed; the other
+                    // still has at least one. Earlier de-cuts in this batch can
+                    // extend a side's MST-forest component beyond a single BPT
+                    // subtree, but every active seed reachable from a given side
+                    // of edge k shares one label by induction on the 2->1
+                    // invariant (Lebon et al., Algorithm 2). The release-mode
+                    // fallback below avoids leaving the labeling in an
+                    // inconsistent state if the invariant ever breaks.
                     hg_assert(false, "Both sides of de-cut edge have different seed labels");
                     target_label = 0;
                     start_vertex = u;
diff --git a/test/cpp/algo/test_watershed.cpp b/test/cpp/algo/test_watershed.cpp
index a97f4788..150db0ef 100644
--- a/test/cpp/algo/test_watershed.cpp
+++ b/test/cpp/algo/test_watershed.cpp
@@ -340,4 +340,54 @@ namespace test_watershed {
         }
     }
 
+    TEST_CASE("incremental watershed cut decuts ancestor descendant",
+              "[incremental_watershed_cut]") {
+        // Path graph 1x8 with weights producing a balanced canonical BPT.
+        // Seeds at 0, 1, 3, 4 then batch remove of 0, 3 produces two de-cuts
+        // where one BPT node is a descendant of the other. Compare to the
+        // full seeded watershed to verify Pass 2a handles cross-level de-cuts.
+        auto g = hg::get_4_adjacency_graph({1, 8});
+        array_1d<double> edge_weights{1, 5, 2, 7, 3, 6, 4};
+
+        array_1d<index_t> sv{0, 1, 3, 4};
+        array_1d<index_t> sl{10, 20, 30, 40};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        iws.add_seeds(sv, sl);
+        array_1d<index_t> rm{0, 3};
+        iws.remove_seeds(rm);
+
+        array_1d<index_t> seeds = xt::zeros<index_t>({g.num_vertices()});
+        seeds(1) = 20;
+        seeds(4) = 40;
+        auto expected = hg::labelisation_seeded_watershed(g, edge_weights, seeds);
+        REQUIRE((iws.get_labeling() == expected));
+    }
+
+    TEST_CASE("incremental watershed cut add duplicate seed throws",
+              "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        array_1d<index_t> sv{0};
+        array_1d<index_t> sl{1};
+        iws.add_seeds(sv, sl);
+
+        array_1d<index_t> sv2{0};
+        array_1d<index_t> sl2{2};
+        REQUIRE_THROWS(iws.add_seeds(sv2, sl2));
+    }
+
+    TEST_CASE("incremental watershed cut add seed label zero throws",
+              "[incremental_watershed_cut]") {
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        array_1d<index_t> sv{0};
+        array_1d<index_t> sl{0};
+        REQUIRE_THROWS(iws.add_seeds(sv, sl));
+    }
+
 }
diff --git a/test/python/test_algo/test_watershed.py b/test/python/test_algo/test_watershed.py
index d59750a7..28b5abf2 100644
--- a/test/python/test_algo/test_watershed.py
+++ b/test/python/test_algo/test_watershed.py
@@ -373,6 +373,52 @@ def test_interactive_churn(self):
                 g, edge_weights, seeds.reshape(100, 100))
             self.assertTrue(np.all(labels == baseline.ravel()))
 
+    def test_decuts_ancestor_descendant(self):
+        """
+        Batch remove producing two de-cuts where one BPT node is the ancestor
+        of the other. Exercises Pass 2a across BPT levels.
+
+        Path graph 1x8 with edge weights chosen so the canonical BPT is the
+        balanced binary tree (e_01, e_23, e_45, e_67 fuse first; then e_12,
+        e_56; then e_34 at the root). Seeds at 0, 1, 3, 4 create cuts at
+        p_01 (depth 2), p_0123 (depth 1) and root (depth 1 of right side).
+        Removing 0 and 3 in one batch triggers de-cuts at p_01 and p_0123,
+        with p_01 a descendant of p_0123 in the BPT.
+        """
+        g = hg.get_4_adjacency_graph((1, 8))
+        edge_weights = np.asarray([1, 5, 2, 7, 3, 6, 4], dtype=np.float64)
+
+        sv = np.array([0, 1, 3, 4], dtype=np.int64)
+        sl = np.array([10, 20, 30, 40], dtype=np.int64)
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+        iws.add_seeds(sv, sl)
+        iws.remove_seeds(np.array([0, 3], dtype=np.int64))
+
+        seeds = np.zeros(g.num_vertices(), dtype=np.int64)
+        seeds[1] = 20
+        seeds[4] = 40
+        expected = hg.labelisation_seeded_watershed(
+            g, edge_weights, seeds.reshape(1, 8))
+        self.assertTrue(np.all(iws.get_labeling() == expected))
+
+    def test_add_seed_already_exists_raises(self):
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+        iws.add_seeds(np.array([0]), np.array([1]))
+        with self.assertRaises(Exception):
+            iws.add_seeds(np.array([0]), np.array([2]))
+
+    def test_add_seed_label_zero_raises(self):
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+        with self.assertRaises(Exception):
+            iws.add_seeds(np.array([0]), np.array([0]))
+
 
 if __name__ == '__main__':
     unittest.main()

From 1ae5debbe3501479c7aecf66d2a77be38420d5e0 Mon Sep 17 00:00:00 2001
From: lapertor <raphael.lapertot@gmail.com>
Date: Wed, 13 May 2026 22:06:36 +0200
Subject: [PATCH 4/5] test(watershed): add regression tests for batch partial
 mutation

Add regression tests for add_seeds and remove_seeds batch duplicate
detection. Tests verify that invalid batches raise exceptions WITHOUT
leaving the object in a partially mutated state.

Python tests:
- test_add_seeds_batch_duplicate_no_partial_mutation
- test_add_seeds_cross_batch_duplicate_no_partial_mutation
- test_remove_seeds_batch_duplicate_no_partial_mutation
- test_remove_seeds_cross_batch_duplicate_no_partial_mutation

C++ tests:
- incremental watershed cut add seeds batch duplicate throws
- incremental watershed cut add seeds cross batch duplicate throws
- incremental watershed cut remove seeds batch duplicate throws
- incremental watershed cut remove seeds non existent in batch throws

Signed-off-by: Raphael Lapertot <raphael.lapertot@gmail.com>
---
 test/cpp/algo/test_watershed.cpp        |  81 +++++++++++++++++++
 test/python/test_algo/test_watershed.py | 103 ++++++++++++++++++++++++
 2 files changed, 184 insertions(+)

diff --git a/test/cpp/algo/test_watershed.cpp b/test/cpp/algo/test_watershed.cpp
index 150db0ef..7a573771 100644
--- a/test/cpp/algo/test_watershed.cpp
+++ b/test/cpp/algo/test_watershed.cpp
@@ -390,4 +390,85 @@ namespace test_watershed {
         REQUIRE_THROWS(iws.add_seeds(sv, sl));
     }
 
+    TEST_CASE("incremental watershed cut add seeds batch duplicate throws",
+              "[incremental_watershed_cut]") {
+        // add_seeds with duplicate vertices in batch must throw without partial mutation.
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        array_1d<index_t> sv1{0, 0};
+        array_1d<index_t> sl1{1, 2};
+        REQUIRE_THROWS(iws.add_seeds(sv1, sl1));
+
+        // Post-exception: object must be in valid state; adding seed 0 with label 1 must succeed.
+        array_1d<index_t> sv2{0};
+        array_1d<index_t> sl2{1};
+        iws.add_seeds(sv2, sl2);
+        REQUIRE(iws.get_labeling()(0) == 1);
+    }
+
+    TEST_CASE("incremental watershed cut add seeds cross batch duplicate throws",
+              "[incremental_watershed_cut]") {
+        // add_seeds duplicate of a previously added seed must throw without partial mutation.
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        // Add seed 0 with label 1
+        array_1d<index_t> sv1{0};
+        array_1d<index_t> sl1{1};
+        iws.add_seeds(sv1, sl1);
+
+        // Attempt to add seed 0 again with different label in batch {0, 3} -> must throw.
+        array_1d<index_t> sv2{0, 3};
+        array_1d<index_t> sl2{2, 2};
+        REQUIRE_THROWS(iws.add_seeds(sv2, sl2));
+
+        // Post-exception: seed 0 must still have label 1 (no partial mutation).
+        REQUIRE(iws.get_labeling()(0) == 1);
+    }
+
+    TEST_CASE("incremental watershed cut remove seeds batch duplicate throws",
+              "[incremental_watershed_cut]") {
+        // remove_seeds with duplicate vertices in batch must throw without partial mutation.
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        array_1d<index_t> add_sv{0, 3};
+        array_1d<index_t> add_sl{1, 2};
+        iws.add_seeds(add_sv, add_sl);
+
+        // Attempt batch remove with duplicate {0, 0} -> must throw.
+        array_1d<index_t> rm{0, 0};
+        REQUIRE_THROWS(iws.remove_seeds(rm));
+
+        // Post-exception: seed 0 must still exist; removing it singly must succeed.
+        REQUIRE(iws.get_labeling()(0) == 1);
+        array_1d<index_t> rm2{0};
+        iws.remove_seeds(rm2);
+        // After removing seed 0, vertex 0 merges into seed 3's component (label 2).
+        REQUIRE(iws.get_labeling()(0) == 2);
+    }
+
+    TEST_CASE("incremental watershed cut remove seeds non existent in batch throws",
+              "[incremental_watershed_cut]") {
+        // remove_seeds where some vertices are not seeds must throw without partial mutation.
+        auto g = hg::get_4_adjacency_graph({2, 2});
+        array_1d<int> edge_weights{1, 2, 3, 4};
+
+        auto iws = hg::make_incremental_watershed_cut(g, edge_weights);
+        array_1d<index_t> add_sv{0};
+        array_1d<index_t> add_sl{1};
+        iws.add_seeds(add_sv, add_sl);
+
+        // Attempt to remove {3, 0} where 3 is not a seed -> must throw.
+        array_1d<index_t> rm{3, 0};
+        REQUIRE_THROWS(iws.remove_seeds(rm));
+
+        // Post-exception: seed 0 must still be present and labeled correctly.
+        REQUIRE(iws.get_labeling()(0) == 1);
+    }
+
 }
diff --git a/test/python/test_algo/test_watershed.py b/test/python/test_algo/test_watershed.py
index 28b5abf2..bb031a0c 100644
--- a/test/python/test_algo/test_watershed.py
+++ b/test/python/test_algo/test_watershed.py
@@ -419,6 +419,109 @@ def test_add_seed_label_zero_raises(self):
         with self.assertRaises(Exception):
             iws.add_seeds(np.array([0]), np.array([0]))
 
+    def test_add_seeds_batch_duplicate_no_partial_mutation(self):
+        """
+        RED regression test: batch add with duplicate vertex [0, 0].
+        Verifies no partial mutation occurs when an exception is raised
+        mid-batch. After the exception, adding seed 0 with label 1 again
+        must succeed, proving vertex 0 was not pre-registered.
+        """
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        # Batch contains duplicate vertex 0 - must raise
+        with self.assertRaises(Exception):
+            iws.add_seeds(np.array([0, 0]), np.array([1, 2]))
+
+        # After exception, seed 0 with label 1 must be addable (no partial mutation)
+        iws.add_seeds(np.array([0]), np.array([1]))
+        labels = iws.get_labeling()
+        self.assertEqual(labels.flat[0], 1)
+
+    def test_add_seeds_cross_batch_duplicate_no_partial_mutation(self):
+        """
+        RED regression test: cross-batch duplicate detection.
+        First adds seed 0 with label 1, then tries to add [0, 3] with labels [2, 2].
+        The duplicate on vertex 0 must raise without affecting:
+        - seed 0 retaining label 1 (verified via get_labeling)
+        - vertex 3 not being pre-registered (verified by successful re-add)
+        """
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        # Seed vertex 0 with label 1
+        iws.add_seeds(np.array([0]), np.array([1]))
+        self.assertEqual(iws.get_labeling().flat[0], 1)
+
+        # Batch contains vertex 0 again (duplicate) - must raise
+        with self.assertRaises(Exception):
+            iws.add_seeds(np.array([0, 3]), np.array([2, 2]))
+
+        # After exception, vertex 0 must still have label 1 (no partial mutation)
+        self.assertEqual(iws.get_labeling().flat[0], 1)
+
+        # Vertex 3 must not be pre-registered, so adding it with label 2 must succeed
+        iws.add_seeds(np.array([3]), np.array([2]))
+        labels = iws.get_labeling()
+        self.assertEqual(labels.flat[3], 2)
+
+    def test_remove_seeds_batch_duplicate_no_partial_mutation(self):
+        """
+        RED regression test: batch remove with duplicate vertex [0, 0].
+        Verifies no partial mutation occurs when an exception is raised
+        mid-batch. After the exception, seed 0 and seed 3 must retain their labels.
+        """
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        # Seed vertices 0 and 3 with labels 1 and 2
+        iws.add_seeds(np.array([0, 3]), np.array([1, 2]))
+        self.assertEqual(iws.get_labeling().flat[0], 1)
+        self.assertEqual(iws.get_labeling().flat[3], 2)
+
+        # Batch contains duplicate vertex 0 - must raise
+        with self.assertRaises(Exception):
+            iws.remove_seeds(np.array([0, 0]))
+
+        # After exception, seed 0 and seed 3 must retain their labels (no partial mutation)
+        self.assertEqual(iws.get_labeling().flat[0], 1)
+        self.assertEqual(iws.get_labeling().flat[3], 2)
+
+    def test_remove_seeds_cross_batch_duplicate_no_partial_mutation(self):
+        """
+        RED regression test: cross-batch duplicate detection.
+        First adds seeds [0, 3] with labels [1, 2], then tries to remove [0, 1]
+        where 1 is not a seed. The duplicate on vertex 1 (not a seed) must raise
+        without affecting seed 0 (label 1) or seed 3 (label 2).
+        """
+        g = hg.get_4_adjacency_graph((2, 2))
+        edge_weights = np.asarray((1, 2, 3, 4))
+
+        iws = hg.IncrementalWatershedCut(g, edge_weights)
+
+        # Seed vertices 0 and 3 with labels 1 and 2
+        iws.add_seeds(np.array([0, 3]), np.array([1, 2]))
+        self.assertEqual(iws.get_labeling().flat[0], 1)
+        self.assertEqual(iws.get_labeling().flat[3], 2)
+
+        # Batch contains vertex 1 which is not a seed - must raise
+        with self.assertRaises(Exception):
+            iws.remove_seeds(np.array([0, 1]))
+
+        # After exception, seed 0 and seed 3 must retain their labels (no partial mutation)
+        self.assertEqual(iws.get_labeling().flat[0], 1)
+        self.assertEqual(iws.get_labeling().flat[3], 2)
+
+        # Removing just seed 0 must succeed
+        iws.remove_seeds(np.array([0]))
+        self.assertEqual(iws.get_labeling().flat[3], 2)
+
 
 if __name__ == '__main__':
     unittest.main()

From 1c477a6fa506e95fda89d8086d8c6a3cbfe6410c Mon Sep 17 00:00:00 2001
From: lapertor <raphael.lapertot@gmail.com>
Date: Wed, 13 May 2026 22:06:53 +0200
Subject: [PATCH 5/5] fix(watershed): pre-validate batches in add_seeds and
 remove_seeds

Add Pass 0 pre-validation to IncrementalWatershedCut::add_seeds and
::remove_seeds to prevent partial batch mutation.

Before this fix, invalid elements in a batch (duplicate vertices,
out-of-range indices, non-existent seeds) would trigger an exception
after some elements had already mutated internal state, leaving the
object in an inconsistent state.

Pass 0 validates the entire batch BEFORE any mutation:
- add_seeds: checks vertex range, label != 0, intra-batch duplicates,
  and cross-batch duplicates (already a seed)
- remove_seeds: checks vertex range, intra-batch duplicates, and
  seed existence

Uses std::unordered_set<index_t> for O(1) intra-batch duplicate
detection. All checks use hg_assert (consistent with existing code,
no-op in Release builds).

No changes to Pass 1/2/2a/2b loop bodies. No rollback/transaction
semantics introduced.

Fixes: PR299 review finding (Medium severity)

Signed-off-by: Raphael Lapertot <raphael.lapertot@gmail.com>
---
 include/higra/algo/watershed.hpp | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/include/higra/algo/watershed.hpp b/include/higra/algo/watershed.hpp
index b85531de..7a7ff0ce 100644
--- a/include/higra/algo/watershed.hpp
+++ b/include/higra/algo/watershed.hpp
@@ -21,6 +21,7 @@
 #include <stack>
 #include <queue>
 #include <unordered_map>
+#include <unordered_set>
 
 namespace hg {
 
@@ -274,6 +275,20 @@ namespace hg {
             hg_assert(seed_vertices.size() == seed_labels.size(),
                       "seed_vertices and seed_labels must have the same size.");
 
+            // Pass 0: pre-validation -- reject entire batch if any element is invalid.
+            {
+                std::unordered_set<index_t> batch_vertices;
+                for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
+                    auto v = (index_t)seed_vertices(i);
+                    auto l = (index_t)seed_labels(i);
+                    hg_assert(v >= 0 && v < m_num_leaves, "Seed vertex out of range.");
+                    hg_assert(l != 0, "Seed label must be non-zero (0 is reserved for background).");
+                    hg_assert(batch_vertices.find(v) == batch_vertices.end(), "Duplicate vertex in batch.");
+                    batch_vertices.insert(v);
+                    hg_assert(m_seed_labels.find(v) == m_seed_labels.end(), "Vertex is already a seed.");
+                }
+            }
+
             // Pass 1: register seeds and update BPT cut state (Algorithm 1, Lebon et al.).
             for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
                 auto v = (index_t)seed_vertices(i);
@@ -314,6 +329,18 @@ namespace hg {
             auto &seed_vertices = xseed_vertices.derived_cast();
             hg_assert_1d_array(seed_vertices);
 
+            // Pass 0: pre-validation -- reject entire batch if any element is invalid.
+            {
+                std::unordered_set<index_t> batch_vertices;
+                for (index_t i = 0; i < (index_t)seed_vertices.size(); i++) {
+                    auto v = (index_t)seed_vertices(i);
+                    hg_assert(v >= 0 && v < m_num_leaves, "Seed vertex out of range.");
+                    hg_assert(batch_vertices.find(v) == batch_vertices.end(), "Duplicate vertex in batch.");
+                    batch_vertices.insert(v);
+                    hg_assert(m_seed_labels.find(v) != m_seed_labels.end(), "Vertex is not a seed.");
+                }
+            }
+
             std::vector<index_t> decut_edges;   // MST edge indices that just got un-cut
             std::vector<index_t> lone_seeds;    // seeds whose walk-up reached the root without a 2->1 transition