From b71eb2a5d3599538555a00cc89961024693638c5 Mon Sep 17 00:00:00 2001
From: Gerard <gs@gspivey.com>
Date: Thu, 2 Jul 2026 06:58:19 -0400
Subject: [PATCH] feat(ci): self-contained integration summary comment (real vs
 infra)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PR summary comment previously showed only per-tier 'N tests, M failures' —
to see WHICH testcase failed, WHY, or whether it was a real code issue vs an
SSM/ENI infra flake, you had to download the JUnit artifact or dig through
collapsed log <details>.

generate_markdown_summary now emits a self-contained test-results/summary.md
(posted via the existing post_pr_comment) with:
- a verdict separating REAL failures from INFRA flakes,
- a per-tier per-testcase table (result + time + reason),
- a Real-failures section with detail excerpts,
- an Infra-flakes section (SSM/ENI setup, labeled task #10).

Classification uses the JUnit failure type: real tier failures are
type=AssertionError (junit_add_failure); synthetic setup failures are
type=ExecutionError (generate_failure_xml), with a keyword fallback. Report-only
— exit-code behavior unchanged.

Scripts-only, no workflow change. Verified locally against sample
pass/real-fail/infra-fail XMLs.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 scripts/run-integration-tests.sh | 132 +++++++++++++++++++++++++++----
 1 file changed, 115 insertions(+), 17 deletions(-)
diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh
index d8c8dc7..3cade20 100755
--- a/scripts/run-integration-tests.sh
+++ b/scripts/run-integration-tests.sh
@@ -1215,6 +1215,115 @@ PYEOF
     log_info "JSON summary generated: $RESULTS_DIR/summary.json"
 }
 
+# Generate a self-contained, analyzable Markdown summary of all tiers: per-tier,
+# per-testcase pass/fail WITH the failure reason, classifying each failure as a
+# real test failure (JUnit type=AssertionError, from the tier scripts) vs an
+# infra/setup flake (type=ExecutionError, from generate_failure_xml — ENI bind /
+# SSM / listener-start timeouts). Written to summary.md so a reviewer can analyze
+# a run from the PR comment alone, without downloading artifacts or using gh.
+generate_markdown_summary() {
+    local commit_hash run_url label
+    commit_hash=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || echo "unknown")
+    run_url="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-gspivey/dpdk-stdlib-rust}/actions/runs/${GITHUB_RUN_ID:-0}"
+    label="${GITHUB_WORKFLOW:-Integration Tests}"
+    python3 - "$RESULTS_DIR" "$commit_hash" "$run_url" "$label" > "$RESULTS_DIR/summary.md" <<'PYEOF'
+import re, os, sys, html
+
+results_dir, commit, run_url, label = sys.argv[1:5]
+# Infra/setup failures use JUnit type=ExecutionError (generate_failure_xml); real
+# test failures use type=AssertionError (harness junit_add_failure). Fall back to
+# a keyword match if the type is missing.
+INFRA_RE = re.compile(r'ENI|SSM|Polling timeout|configure-eni|assign-ip|Failed to start|unbind|IP assignment|listener|readiness|instance', re.I)
+
+def classify(ftype, msg):
+    if (ftype or '') == 'ExecutionError':
+        return 'infra'
+    if (ftype or '') == 'AssertionError':
+        return 'real'
+    return 'infra' if INFRA_RE.search(msg or '') else 'real'
+
+def cell(s):
+    s = html.unescape(s or '').replace('\n', ' ').replace('|', r'\|').strip()
+    return (s[:90] + '…') if len(s) > 91 else s
+
+rows, reals, infras = [], [], []
+total = passed = n_real = n_infra = 0
+
+for xmlf in sorted(os.listdir(results_dir)):
+    if not xmlf.endswith('.xml'):
+        continue
+    with open(os.path.join(results_dir, xmlf)) as f:
+        content = f.read()
+    sm = re.search(r'<testsuite\s+([^>]+)>', content)
+    if not sm:
+        continue
+    m = re.search(r'name="([^"]*)"', sm.group(1))
+    suite = m.group(1) if m else xmlf
+    for tc in re.finditer(r'<testcase\s+([^>]*?)>(.*?)</testcase>', content, re.DOTALL):
+        attrs, body = tc.group(1), tc.group(2)
+        m = re.search(r'name="([^"]*)"', attrs)
+        name = m.group(1) if m else 'unknown'
+        m = re.search(r'time="([^"]*)"', attrs)
+        tsec = m.group(1) if m else '0'
+        fm = re.search(r'<failure\s+message="([^"]*)"(?:\s+type="([^"]*)")?[^>]*>(.*?)</failure>', body, re.DOTALL)
+        total += 1
+        if not fm:
+            passed += 1
+            rows.append((suite, name, '✅ pass', tsec, ''))
+            continue
+        msg = html.unescape(fm.group(1))
+        kind = classify(fm.group(2), msg)
+        details = html.unescape(fm.group(3)).strip()
+        if kind == 'infra':
+            n_infra += 1
+            rows.append((suite, name, '⚠️ infra', tsec, cell(msg)))
+            infras.append((suite, name, msg))
+        else:
+            n_real += 1
+            rows.append((suite, name, '❌ real', tsec, cell(msg)))
+            reals.append((suite, name, msg, details))
+
+def plural(n):
+    return '' if n == 1 else 's'
+
+if n_real:
+    verdict = f'❌ {n_real} real failure{plural(n_real)}'
+    if n_infra:
+        verdict += f' · ⚠️ {n_infra} infra flake{plural(n_infra)}'
+elif n_infra:
+    verdict = f'⚠️ {n_infra} infra flake{plural(n_infra)} · 0 real failures'
+else:
+    verdict = '✅ all passed'
+
+o = []
+o.append(f'## Integration Tests — {verdict}')
+o.append(f'`{commit}` · [run]({run_url}) · {label} · {total} tests: {passed} ✅ · {n_real} ❌ real · {n_infra} ⚠️ infra')
+o.append('')
+o.append('| Tier | Test | Result | Time | Reason |')
+o.append('|------|------|--------|-----:|--------|')
+for suite, name, result, tsec, reason in rows:
+    o.append(f'| {suite} | {name} | {result} | {tsec}s | {reason} |')
+o.append('')
+if reals:
+    o.append(f'### ❌ Real failures ({n_real}) — code/test issues')
+    for suite, name, msg, details in reals:
+        o.append(f'- **{suite} / {name}** — {html.unescape(msg)}')
+        if details:
+            o.append(f'  <details><summary>details</summary>\n\n```\n{details[:1500]}\n```\n</details>')
+    o.append('')
+if infras:
+    o.append(f'### ⚠️ Infra flakes ({n_infra}) — SSM/ENI setup, not code (task #10)')
+    for suite, name, msg in infras:
+        o.append(f'- {suite} / {name} — {html.unescape(msg)}')
+    o.append('')
+if not reals and not infras:
+    o.append(f'### ✅ All {total} tests passed')
+
+print('\n'.join(o))
+PYEOF
+    log_info "Markdown summary generated: $RESULTS_DIR/summary.md"
+}
+
 # ── Teardown ─────────────────────────────────────────────────────────────────
 
 teardown_infrastructure() {
@@ -1619,24 +1728,13 @@ Infrastructure ready.
     collect_instance_logs || true
     write_step_summary || true
 
-    # Post final summary to PR
-    local summary_body="## [CI] Stage: Summary\n"
-    if [[ "$TEST_EXIT_CODE" -eq 0 ]]; then
-        summary_body+="All tests **PASSED**."
-    else
-        summary_body+="Some tests **FAILED** (exit code: $TEST_EXIT_CODE)."
+    # Post the final analyzable summary to the PR: per-tier, per-testcase pass/
+    # fail with the failure reason, classifying real test failures vs infra/setup
+    # flakes. Self-contained — a run can be analyzed from this comment alone.
+    generate_markdown_summary
+    if [[ -f "$RESULTS_DIR/summary.md" ]]; then
+        post_pr_comment "$(cat "$RESULTS_DIR/summary.md")"
     fi
-    summary_body+="\n\nARP seeding: kernel /proc/net/arp (automatic)"
-    # Include JUnit results summary
-    for xml_file in "$RESULTS_DIR"/*.xml; do
-        [[ -f "$xml_file" ]] || continue
-        local suite_name tests failures
-        suite_name=$(sed -n 's/.*name="\([^"]*\)".*/\1/p' "$xml_file" | head -1)
-        tests=$(sed -n 's/.*tests="\([^"]*\)".*/\1/p' "$xml_file" | head -1)
-        failures=$(sed -n 's/.*failures="\([^"]*\)".*/\1/p' "$xml_file" | head -1)
-        summary_body+="\n- **${suite_name:-unknown}**: ${tests:-0} tests, ${failures:-0} failures"
-    done
-    post_pr_comment "$(echo -e "$summary_body")"
 
     # Step 9: Teardown
     teardown_infrastructure