From b71eb2a5d3599538555a00cc89961024693638c5 Mon Sep 17 00:00:00 2001 From: Gerard Date: Thu, 2 Jul 2026 06:58:19 -0400 Subject: [PATCH] feat(ci): self-contained integration summary comment (real vs infra) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR summary comment previously showed only per-tier 'N tests, M failures' — to see WHICH testcase failed, WHY, or whether it was a real code issue vs an SSM/ENI infra flake, you had to download the JUnit artifact or dig through collapsed log
. generate_markdown_summary now emits a self-contained test-results/summary.md (posted via the existing post_pr_comment) with: - a verdict separating REAL failures from INFRA flakes, - a per-tier per-testcase table (result + time + reason), - a Real-failures section with detail excerpts, - an Infra-flakes section (SSM/ENI setup, labeled task #10). Classification uses the JUnit failure type: real tier failures are type=AssertionError (junit_add_failure); synthetic setup failures are type=ExecutionError (generate_failure_xml), with a keyword fallback. Report-only — exit-code behavior unchanged. Scripts-only, no workflow change. Verified locally against sample pass/real-fail/infra-fail XMLs. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/run-integration-tests.sh | 132 +++++++++++++++++++++++++++---- 1 file changed, 115 insertions(+), 17 deletions(-) diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index d8c8dc7..3cade20 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -1215,6 +1215,115 @@ PYEOF log_info "JSON summary generated: $RESULTS_DIR/summary.json" } +# Generate a self-contained, analyzable Markdown summary of all tiers: per-tier, +# per-testcase pass/fail WITH the failure reason, classifying each failure as a +# real test failure (JUnit type=AssertionError, from the tier scripts) vs an +# infra/setup flake (type=ExecutionError, from generate_failure_xml — ENI bind / +# SSM / listener-start timeouts). Written to summary.md so a reviewer can analyze +# a run from the PR comment alone, without downloading artifacts or using gh. +generate_markdown_summary() { + local commit_hash run_url label + commit_hash=$(git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || echo "unknown") + run_url="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY:-gspivey/dpdk-stdlib-rust}/actions/runs/${GITHUB_RUN_ID:-0}" + label="${GITHUB_WORKFLOW:-Integration Tests}" + python3 - "$RESULTS_DIR" "$commit_hash" "$run_url" "$label" > "$RESULTS_DIR/summary.md" <<'PYEOF' +import re, os, sys, html + +results_dir, commit, run_url, label = sys.argv[1:5] +# Infra/setup failures use JUnit type=ExecutionError (generate_failure_xml); real +# test failures use type=AssertionError (harness junit_add_failure). Fall back to +# a keyword match if the type is missing. +INFRA_RE = re.compile(r'ENI|SSM|Polling timeout|configure-eni|assign-ip|Failed to start|unbind|IP assignment|listener|readiness|instance', re.I) + +def classify(ftype, msg): + if (ftype or '') == 'ExecutionError': + return 'infra' + if (ftype or '') == 'AssertionError': + return 'real' + return 'infra' if INFRA_RE.search(msg or '') else 'real' + +def cell(s): + s = html.unescape(s or '').replace('\n', ' ').replace('|', r'\|').strip() + return (s[:90] + '…') if len(s) > 91 else s + +rows, reals, infras = [], [], [] +total = passed = n_real = n_infra = 0 + +for xmlf in sorted(os.listdir(results_dir)): + if not xmlf.endswith('.xml'): + continue + with open(os.path.join(results_dir, xmlf)) as f: + content = f.read() + sm = re.search(r']+)>', content) + if not sm: + continue + m = re.search(r'name="([^"]*)"', sm.group(1)) + suite = m.group(1) if m else xmlf + for tc in re.finditer(r']*?)>(.*?)', content, re.DOTALL): + attrs, body = tc.group(1), tc.group(2) + m = re.search(r'name="([^"]*)"', attrs) + name = m.group(1) if m else 'unknown' + m = re.search(r'time="([^"]*)"', attrs) + tsec = m.group(1) if m else '0' + fm = re.search(r']*>(.*?)', body, re.DOTALL) + total += 1 + if not fm: + passed += 1 + rows.append((suite, name, '✅ pass', tsec, '')) + continue + msg = html.unescape(fm.group(1)) + kind = classify(fm.group(2), msg) + details = html.unescape(fm.group(3)).strip() + if kind == 'infra': + n_infra += 1 + rows.append((suite, name, '⚠️ infra', tsec, cell(msg))) + infras.append((suite, name, msg)) + else: + n_real += 1 + rows.append((suite, name, '❌ real', tsec, cell(msg))) + reals.append((suite, name, msg, details)) + +def plural(n): + return '' if n == 1 else 's' + +if n_real: + verdict = f'❌ {n_real} real failure{plural(n_real)}' + if n_infra: + verdict += f' · ⚠️ {n_infra} infra flake{plural(n_infra)}' +elif n_infra: + verdict = f'⚠️ {n_infra} infra flake{plural(n_infra)} · 0 real failures' +else: + verdict = '✅ all passed' + +o = [] +o.append(f'## Integration Tests — {verdict}') +o.append(f'`{commit}` · [run]({run_url}) · {label} · {total} tests: {passed} ✅ · {n_real} ❌ real · {n_infra} ⚠️ infra') +o.append('') +o.append('| Tier | Test | Result | Time | Reason |') +o.append('|------|------|--------|-----:|--------|') +for suite, name, result, tsec, reason in rows: + o.append(f'| {suite} | {name} | {result} | {tsec}s | {reason} |') +o.append('') +if reals: + o.append(f'### ❌ Real failures ({n_real}) — code/test issues') + for suite, name, msg, details in reals: + o.append(f'- **{suite} / {name}** — {html.unescape(msg)}') + if details: + o.append(f'
details\n\n```\n{details[:1500]}\n```\n
') + o.append('') +if infras: + o.append(f'### ⚠️ Infra flakes ({n_infra}) — SSM/ENI setup, not code (task #10)') + for suite, name, msg in infras: + o.append(f'- {suite} / {name} — {html.unescape(msg)}') + o.append('') +if not reals and not infras: + o.append(f'### ✅ All {total} tests passed') + +print('\n'.join(o)) +PYEOF + log_info "Markdown summary generated: $RESULTS_DIR/summary.md" +} + # ── Teardown ───────────────────────────────────────────────────────────────── teardown_infrastructure() { @@ -1619,24 +1728,13 @@ Infrastructure ready. collect_instance_logs || true write_step_summary || true - # Post final summary to PR - local summary_body="## [CI] Stage: Summary\n" - if [[ "$TEST_EXIT_CODE" -eq 0 ]]; then - summary_body+="All tests **PASSED**." - else - summary_body+="Some tests **FAILED** (exit code: $TEST_EXIT_CODE)." + # Post the final analyzable summary to the PR: per-tier, per-testcase pass/ + # fail with the failure reason, classifying real test failures vs infra/setup + # flakes. Self-contained — a run can be analyzed from this comment alone. + generate_markdown_summary + if [[ -f "$RESULTS_DIR/summary.md" ]]; then + post_pr_comment "$(cat "$RESULTS_DIR/summary.md")" fi - summary_body+="\n\nARP seeding: kernel /proc/net/arp (automatic)" - # Include JUnit results summary - for xml_file in "$RESULTS_DIR"/*.xml; do - [[ -f "$xml_file" ]] || continue - local suite_name tests failures - suite_name=$(sed -n 's/.*name="\([^"]*\)".*/\1/p' "$xml_file" | head -1) - tests=$(sed -n 's/.*tests="\([^"]*\)".*/\1/p' "$xml_file" | head -1) - failures=$(sed -n 's/.*failures="\([^"]*\)".*/\1/p' "$xml_file" | head -1) - summary_body+="\n- **${suite_name:-unknown}**: ${tests:-0} tests, ${failures:-0} failures" - done - post_pr_comment "$(echo -e "$summary_body")" # Step 9: Teardown teardown_infrastructure