From 7cd683bd925b57304174fe5c61dbeeaf5cccae01 Mon Sep 17 00:00:00 2001 From: Enrico Battocchi Date: Thu, 28 May 2026 09:59:16 +0200 Subject: [PATCH 1/4] Collapse multiple unprocessed RCs to the latest one per sync run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run 26561237947 (2026-05-28 schedule) processed both 27.8-RC1 and 27.8-RC2 because the resolver fanned out one matrix entry per unprocessed RC. The agent then independently re-documented the same wpseo_custom_fields_pre_query filter, producing PR #400 (for RC1) and PR #401 (for RC2) — identical titles, identical work, one of them obvious waste. Process only the latest unprocessed RC per product per run. The earlier unprocessed RCs are silently superseded: - The resolver collects them in a new superseded_actions list (paired with the RC that supersedes them). - A new "Mark superseded RCs" step posts a brief marker comment on the tracking issue for each, mirroring the dedup style of the safety-net step (skips if a marker already exists). - The latest RC's diff base is computed as before: if a prior RC of the same base is in processed_markers, diff against that (incremental); otherwise against the latest stable (full release cycle). When multiple never-seen RCs of the same base collapse to the latest, that's a full stable-cycle diff which captures any net public-surface changes (an RC2 that reverts an RC1 change shows neither; an RC2 that adds on top of RC1 shows both). - workflow_dispatch with an explicit input_rc_tag bypasses the collapse, preserving the manual-backfill path. Synthetic resolver tests (5 scenarios) cover: the bug case, the already-processed-prior-RC case, single-RC behavior unchanged, empty queue, and the input_rc_tag bypass — all pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/rc-docs-sync.yml | 50 ++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rc-docs-sync.yml b/.github/workflows/rc-docs-sync.yml index bd7eefc8..c6dd9627 100644 --- a/.github/workflows/rc-docs-sync.yml +++ b/.github/workflows/rc-docs-sync.yml @@ -130,7 +130,7 @@ jobs: input_product = os.environ.get("INPUT_PRODUCT") or "" input_rc_tag = os.environ.get("INPUT_RC_TAG") or "" - queue, seed_actions = [], [] + queue, seed_actions, superseded_actions = [], [], [] products_to_sweep = [input_product] if input_product else list(PRODUCTS.keys()) for slug in products_to_sweep: @@ -165,6 +165,26 @@ jobs: continue last_key = sort_key(processed_markers[-1]) rcs_to_process = sorted([t for t in rc_tags if sort_key(t) > last_key], key=sort_key) + # Collapse multiple unprocessed RCs to just the latest. Earlier + # RCs in the same sync run would otherwise fan out as separate + # matrix entries and produce duplicate PRs (see GH run + # 26561237947, which opened PRs #400 and #401 for the same + # filter introduced in 27.8-RC1 and re-emitted in 27.8-RC2). + # The latest RC's diff base is computed below as usual; the + # skipped RCs get a "superseded" marker so the state machine + # advances past them. Explicit per-RC backfill via + # workflow_dispatch with input_rc_tag bypasses this collapse. + if len(rcs_to_process) > 1: + latest = rcs_to_process[-1] + for skipped in rcs_to_process[:-1]: + superseded_actions.append({ + "issue": tracking_issue, + "product": slug, + "rc_tag": skipped, + "superseded_by": latest, + "display_name": product["display_name"], + }) + rcs_to_process = [latest] for rc_tag in rcs_to_process: # Prefer the most recent already-processed RC of the same base version as @@ -200,13 +220,14 @@ jobs: "tracking_issue": tracking_issue, }) - print(json.dumps({"queue": queue, "seeds": seed_actions})) + print(json.dumps({"queue": queue, "seeds": seed_actions, "superseded": superseded_actions})) PY cat queue.json # Emit queue as compact JSON for matrix consumption. echo "queue_json=$(jq -c '.queue' queue.json)" >> "$GITHUB_OUTPUT" echo "count=$(jq '.queue | length' queue.json)" >> "$GITHUB_OUTPUT" echo "seed_count=$(jq '.seeds | length' queue.json)" >> "$GITHUB_OUTPUT" + echo "superseded_count=$(jq '.superseded | length' queue.json)" >> "$GITHUB_OUTPUT" - name: Seed first-run tracking issues if: steps.queue.outputs.seed_count != '0' @@ -225,6 +246,31 @@ jobs: **First-run seed for ${display}** — RC tag \`${rc_tag}\` recorded as the baseline. No historical RCs will be processed automatically. To backfill a specific RC, use \`workflow_dispatch\` with \`product=${product}\` and the desired \`rc_tag\`." done + - name: Mark superseded RCs (latest-only-per-run) + if: steps.queue.outputs.superseded_count != '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_REPO: ${{ github.repository }} + run: | + set -euo pipefail + jq -c '.superseded[]' queue.json | while read -r entry; do + issue=$(echo "$entry" | jq -r .issue) + product=$(echo "$entry" | jq -r .product) + rc_tag=$(echo "$entry" | jq -r .rc_tag) + superseded_by=$(echo "$entry" | jq -r .superseded_by) + display=$(echo "$entry" | jq -r .display_name) + base_version="${rc_tag%-RC*}" + # Idempotency: skip if a marker for this RC already exists. + if gh issue view "$issue" --json comments --jq '.comments[].body' \ + | grep -Eq ""; then + echo "Marker for ${product} ${rc_tag} already exists; skipping superseded marker." + continue + fi + gh issue comment "$issue" --body " + + **${display} ${base_version}** (RC \`${rc_tag}\`) — superseded by \`${superseded_by}\` in the same sync run. The later RC was processed against the same diff base, so any net public-surface changes are covered there. This RC was not individually processed." + done + - name: Note when queue is empty if: steps.queue.outputs.count == '0' run: echo "No new RC tags to process this run." From eca2bceb40acd39af71a25cd2dddc0646ae93722 Mon Sep 17 00:00:00 2001 From: Enrico Battocchi Date: Thu, 28 May 2026 13:07:59 +0200 Subject: [PATCH 2/4] Collapse per base_version, not across all unprocessed RCs Copilot caught this on #402: if the workflow misses RCs from two different cycles (e.g. 27.8-RC1 and 27.9-RC1 both unprocessed), the previous logic collapsed both into 27.9-RC1 against the latest stable, which never sees 27.8-RC1's public surface. Group unprocessed RCs by base_version, keep only the latest per group. Each base gets its own queue entry diffed against the previous stable. Synthetic test (scenario 6) confirms the fix. --- .github/workflows/rc-docs-sync.yml | 44 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/.github/workflows/rc-docs-sync.yml b/.github/workflows/rc-docs-sync.yml index c6dd9627..ac4348b0 100644 --- a/.github/workflows/rc-docs-sync.yml +++ b/.github/workflows/rc-docs-sync.yml @@ -165,26 +165,34 @@ jobs: continue last_key = sort_key(processed_markers[-1]) rcs_to_process = sorted([t for t in rc_tags if sort_key(t) > last_key], key=sort_key) - # Collapse multiple unprocessed RCs to just the latest. Earlier - # RCs in the same sync run would otherwise fan out as separate - # matrix entries and produce duplicate PRs (see GH run - # 26561237947, which opened PRs #400 and #401 for the same - # filter introduced in 27.8-RC1 and re-emitted in 27.8-RC2). - # The latest RC's diff base is computed below as usual; the - # skipped RCs get a "superseded" marker so the state machine - # advances past them. Explicit per-RC backfill via + # Within each base version, collapse to just the latest unprocessed + # RC and mark earlier ones in that group as superseded — earlier RCs + # in the same base would otherwise fan out as separate matrix + # entries and produce duplicate PRs (see GH run 26561237947, which + # opened #400 and #401 for the same filter introduced in 27.8-RC1 + # and re-emitted in 27.8-RC2). Collapsing is *per base version* so + # if two cycles' RCs are simultaneously unprocessed (e.g. workflow + # missed 27.8-RC1 and 27.9-RC1), each base still gets its own + # queue entry diffed against its own previous-stable. Explicit # workflow_dispatch with input_rc_tag bypasses this collapse. if len(rcs_to_process) > 1: - latest = rcs_to_process[-1] - for skipped in rcs_to_process[:-1]: - superseded_actions.append({ - "issue": tracking_issue, - "product": slug, - "rc_tag": skipped, - "superseded_by": latest, - "display_name": product["display_name"], - }) - rcs_to_process = [latest] + by_base = {} + for rc in rcs_to_process: + by_base.setdefault(base_version(rc), []).append(rc) + collapsed = [] + for base, group in by_base.items(): + group_sorted = sorted(group, key=sort_key) + latest_in_group = group_sorted[-1] + for skipped in group_sorted[:-1]: + superseded_actions.append({ + "issue": tracking_issue, + "product": slug, + "rc_tag": skipped, + "superseded_by": latest_in_group, + "display_name": product["display_name"], + }) + collapsed.append(latest_in_group) + rcs_to_process = sorted(collapsed, key=sort_key) for rc_tag in rcs_to_process: # Prefer the most recent already-processed RC of the same base version as From f5b917b30e8fa22410f9d4499348c3a00e7059b9 Mon Sep 17 00:00:00 2001 From: Enrico Battocchi Date: Thu, 28 May 2026 13:24:26 +0200 Subject: [PATCH 3/4] Post superseded markers from the matrix entry, gated on its own marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot caught: posting superseded markers in the resolve job advances state past the skipped RCs before the matrix entry that's supposed to cover them has finished. If the latest RC's clone or bundle step fails (no marker for it gets posted), the next run treats the superseded RCs as `same_base_processed` and uses them as diff bases — missing changes between stable and the skipped RC. Move the marker-posting into the matrix job: each queue entry now carries its own `superseded_rcs` list, and a final per-entry step posts them only after verifying the entry's own marker is already on the tracking issue. A clone/bundle failure now leaves the superseded RCs unmarked, so the next run re-picks them up correctly. --- .github/workflows/rc-docs-sync.yml | 104 ++++++++++++++++------------- 1 file changed, 59 insertions(+), 45 deletions(-) diff --git a/.github/workflows/rc-docs-sync.yml b/.github/workflows/rc-docs-sync.yml index ac4348b0..7ca3e31c 100644 --- a/.github/workflows/rc-docs-sync.yml +++ b/.github/workflows/rc-docs-sync.yml @@ -130,7 +130,7 @@ jobs: input_product = os.environ.get("INPUT_PRODUCT") or "" input_rc_tag = os.environ.get("INPUT_RC_TAG") or "" - queue, seed_actions, superseded_actions = [], [], [] + queue, seed_actions = [], [] products_to_sweep = [input_product] if input_product else list(PRODUCTS.keys()) for slug in products_to_sweep: @@ -166,15 +166,19 @@ jobs: last_key = sort_key(processed_markers[-1]) rcs_to_process = sorted([t for t in rc_tags if sort_key(t) > last_key], key=sort_key) # Within each base version, collapse to just the latest unprocessed - # RC and mark earlier ones in that group as superseded — earlier RCs - # in the same base would otherwise fan out as separate matrix - # entries and produce duplicate PRs (see GH run 26561237947, which - # opened #400 and #401 for the same filter introduced in 27.8-RC1 - # and re-emitted in 27.8-RC2). Collapsing is *per base version* so - # if two cycles' RCs are simultaneously unprocessed (e.g. workflow - # missed 27.8-RC1 and 27.9-RC1), each base still gets its own - # queue entry diffed against its own previous-stable. Explicit - # workflow_dispatch with input_rc_tag bypasses this collapse. + # RC and attach the skipped ones as `superseded_rcs` on that queue + # entry. The matrix step that processes the latest RC will post + # the superseded markers itself, AFTER its own marker is in place + # — that way a clone/bundle failure doesn't leave the tracking + # issue with superseded markers that incorrectly advance state + # past unprocessed RCs. + # + # Collapsing is *per base version*: if two cycles' RCs are + # simultaneously unprocessed (e.g. workflow missed 27.8-RC1 and + # 27.9-RC1), each base still gets its own queue entry diffed + # against its own previous-stable. Explicit workflow_dispatch + # with input_rc_tag bypasses this collapse. + superseded_by_latest = {} if len(rcs_to_process) > 1: by_base = {} for rc in rcs_to_process: @@ -183,14 +187,7 @@ jobs: for base, group in by_base.items(): group_sorted = sorted(group, key=sort_key) latest_in_group = group_sorted[-1] - for skipped in group_sorted[:-1]: - superseded_actions.append({ - "issue": tracking_issue, - "product": slug, - "rc_tag": skipped, - "superseded_by": latest_in_group, - "display_name": product["display_name"], - }) + superseded_by_latest[latest_in_group] = group_sorted[:-1] collapsed.append(latest_in_group) rcs_to_process = sorted(collapsed, key=sort_key) @@ -226,16 +223,16 @@ jobs: "prev_release": prev, "prev_kind": prev_kind, "tracking_issue": tracking_issue, + "superseded_rcs": superseded_by_latest.get(rc_tag, []), }) - print(json.dumps({"queue": queue, "seeds": seed_actions, "superseded": superseded_actions})) + print(json.dumps({"queue": queue, "seeds": seed_actions})) PY cat queue.json # Emit queue as compact JSON for matrix consumption. echo "queue_json=$(jq -c '.queue' queue.json)" >> "$GITHUB_OUTPUT" echo "count=$(jq '.queue | length' queue.json)" >> "$GITHUB_OUTPUT" echo "seed_count=$(jq '.seeds | length' queue.json)" >> "$GITHUB_OUTPUT" - echo "superseded_count=$(jq '.superseded | length' queue.json)" >> "$GITHUB_OUTPUT" - name: Seed first-run tracking issues if: steps.queue.outputs.seed_count != '0' @@ -254,31 +251,6 @@ jobs: **First-run seed for ${display}** — RC tag \`${rc_tag}\` recorded as the baseline. No historical RCs will be processed automatically. To backfill a specific RC, use \`workflow_dispatch\` with \`product=${product}\` and the desired \`rc_tag\`." done - - name: Mark superseded RCs (latest-only-per-run) - if: steps.queue.outputs.superseded_count != '0' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GH_REPO: ${{ github.repository }} - run: | - set -euo pipefail - jq -c '.superseded[]' queue.json | while read -r entry; do - issue=$(echo "$entry" | jq -r .issue) - product=$(echo "$entry" | jq -r .product) - rc_tag=$(echo "$entry" | jq -r .rc_tag) - superseded_by=$(echo "$entry" | jq -r .superseded_by) - display=$(echo "$entry" | jq -r .display_name) - base_version="${rc_tag%-RC*}" - # Idempotency: skip if a marker for this RC already exists. - if gh issue view "$issue" --json comments --jq '.comments[].body' \ - | grep -Eq ""; then - echo "Marker for ${product} ${rc_tag} already exists; skipping superseded marker." - continue - fi - gh issue comment "$issue" --body " - - **${display} ${base_version}** (RC \`${rc_tag}\`) — superseded by \`${superseded_by}\` in the same sync run. The later RC was processed against the same diff base, so any net public-surface changes are covered there. This RC was not individually processed." - done - - name: Note when queue is empty if: steps.queue.outputs.count == '0' run: echo "No new RC tags to process this run." @@ -687,3 +659,45 @@ jobs: Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} Inspect the run logs and any PRs labeled \`rc/${rc_tag}\` to see what the agent produced before failing." + + # Posts "superseded by X" markers for the RCs this matrix entry collapsed + # past. Gated on the latest RC's own marker actually being present — if + # clone or bundle failed before any marker for this RC was posted, the + # skipped RCs stay unmarked so the next run re-picks them up correctly + # (rather than being incorrectly used as `same_base_processed` diff bases). + - name: Mark RCs superseded by this entry + if: always() && steps.bundle.outputs.any_content != '' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_REPO: ${{ github.repository }} + LATEST_RC_TAG: ${{ matrix.item.rc_tag }} + DISPLAY_NAME: ${{ matrix.item.display_name }} + PRODUCT: ${{ matrix.item.product }} + TRACKING_ISSUE: ${{ matrix.item.tracking_issue }} + SUPERSEDED_RCS: ${{ toJSON(matrix.item.superseded_rcs) }} + run: | + set -euo pipefail + # No-op when this entry didn't collapse anything. + if [ "$(echo "$SUPERSEDED_RCS" | jq 'length')" = "0" ]; then + exit 0 + fi + # Refuse to post superseded markers unless the marker for this entry's + # latest RC is already on the tracking issue (placed by the no-op step, + # fast-path step, agent itself, or the safety-net step above). + if ! gh issue view "$TRACKING_ISSUE" --json comments --jq '.comments[].body' \ + | grep -Eq ""; then + echo "Latest RC ${LATEST_RC_TAG} has no marker yet; not posting superseded markers." + exit 0 + fi + base_version="${LATEST_RC_TAG%-RC*}" + echo "$SUPERSEDED_RCS" | jq -r '.[]' | while read -r rc_tag; do + [ -z "$rc_tag" ] && continue + if gh issue view "$TRACKING_ISSUE" --json comments --jq '.comments[].body' \ + | grep -Eq ""; then + echo "Marker for ${rc_tag} already exists; skipping." + continue + fi + gh issue comment "$TRACKING_ISSUE" --body " + + **${DISPLAY_NAME} ${base_version}** (RC \`${rc_tag}\`) — superseded by \`${LATEST_RC_TAG}\` in the same sync run. The later RC was processed against the same diff base, so any net public-surface changes are covered there." + done From 55bad1cd379f95be5baa82f10355336f817bb8a4 Mon Sep 17 00:00:00 2001 From: Enrico Battocchi Date: Thu, 28 May 2026 13:44:52 +0200 Subject: [PATCH 4/4] Fix two latent resolver flaws Copilot caught MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - last_key was reading processed_markers[-1] (document order). Superseded markers are posted *after* the superseding RC's marker, so document order would identify an older (superseded) RC as the high-water mark and re-queue an already-processed RC. Use max by sort_key instead. - superseded_by_latest was initialized only inside the non-dispatch branch, so workflow_dispatch with explicit input_rc_tag would NameError when building the queue entry. Initialize at the top of each per-product iteration (also prevents accidental cross-product leakage once PRODUCTS has more than one entry). Synthetic test suite extended to 9 scenarios, including the document-order marker case and the dispatch-bypass case — both now pass. --- .github/workflows/rc-docs-sync.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rc-docs-sync.yml b/.github/workflows/rc-docs-sync.yml index 7ca3e31c..250a1f21 100644 --- a/.github/workflows/rc-docs-sync.yml +++ b/.github/workflows/rc-docs-sync.yml @@ -148,6 +148,9 @@ jobs: stable_tags = [t for t in all_tags if STABLE_RE.match(t)] processed_markers = fetch_processed_markers(tracking_issue, slug) + # Per-product state — must be initialized for every iteration of the + # outer loop, and used by both the dispatch and the scheduled paths. + superseded_by_latest = {} if input_rc_tag and input_product == slug: if input_rc_tag not in rc_tags: @@ -163,7 +166,14 @@ jobs: "rc_tag": seed_rc, "display_name": product["display_name"], }) continue - last_key = sort_key(processed_markers[-1]) + # Use the max processed marker by sort_key, not the last in + # document order. Superseded markers are posted *after* their + # superseding RC's marker, so document order would mis-identify + # an older (superseded) RC as the high-water mark and re-queue + # an already-processed RC. Manual backfills via workflow_dispatch + # (which can post markers for arbitrarily-old RCs) have the same + # property. + last_key = sort_key(max(processed_markers, key=sort_key)) rcs_to_process = sorted([t for t in rc_tags if sort_key(t) > last_key], key=sort_key) # Within each base version, collapse to just the latest unprocessed # RC and attach the skipped ones as `superseded_rcs` on that queue @@ -178,7 +188,6 @@ jobs: # 27.9-RC1), each base still gets its own queue entry diffed # against its own previous-stable. Explicit workflow_dispatch # with input_rc_tag bypasses this collapse. - superseded_by_latest = {} if len(rcs_to_process) > 1: by_base = {} for rc in rcs_to_process: