From 3fc25e82dc08f8dab5b2636a6f81702c0c2765c8 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 17:14:57 +0200 Subject: [PATCH 1/8] fix(ci): migrate benchmarks to benchmarking-platform trigger --- .gitlab-ci.yml | 1 + .gitlab/benchmarks/.gitlab-ci.yml | 107 +++++++--------------- .gitlab/benchmarks/download-s3-reports.sh | 28 ++++++ 3 files changed, 62 insertions(+), 74 deletions(-) create mode 100755 .gitlab/benchmarks/download-s3-reports.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 83ce0def5..b0c73be46 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -31,6 +31,7 @@ stages: - integration-test - reliability - benchmarks + - post-benchmarks - fuzz - notify diff --git a/.gitlab/benchmarks/.gitlab-ci.yml b/.gitlab/benchmarks/.gitlab-ci.yml index 960383af6..e322fb131 100644 --- a/.gitlab/benchmarks/.gitlab-ci.yml +++ b/.gitlab/benchmarks/.gitlab-ci.yml @@ -2,13 +2,11 @@ variables: PREPARE_IMAGE: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:latest DD_OCTO_STS_IMAGE: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1 -.benchmark_job: - extends: .deploy-sa +# Bridge job: triggers the BP pipeline and blocks until it completes. +# Bridge jobs cannot appear in other jobs' needs: — downstream jobs use +# stage ordering (post-benchmarks stage runs after benchmarks stage). +benchmarks-trigger: stage: benchmarks - timeout: 6h - variables: - ITERATIONS: "${BENCHMARK_ITERATIONS:-1}" - MODES: "${BENCHMARK_MODES:-cpu,wall,alloc,memleak}" needs: - job: get-versions artifacts: true @@ -24,64 +22,27 @@ variables: - if: '$CI_PIPELINE_SOURCE == "web"' when: manual allow_failure: true - # Run automatically and non-blocking on any other source (push/trigger/api/ - # etc.) — mirrors the integration-test rules. The before_script CANCELLED - # gate skips branches with no open PR. + # Run automatically and non-blocking on any other source (push/trigger/api/etc.) - when: on_success allow_failure: true - script: | - # setup the env - export ARTIFACTS_DIR="$(pwd)/reports" && (mkdir "${ARTIFACTS_DIR}" || :) - export CANDIDATE_VERSION=${CURRENT_VERSION} - export BASELINE_VERSION=${PREVIOUS_VERSION} - export PLATFORM_DIR=".benchmarks/platform" - - # check for missing candidate version - if [ -z "${CANDIDATE_VERSION}" ]; then echo "Missing candidate version. Skipping."; exit 0; fi - - # fetch the common platform scripts - git -c url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf="https://github.com/DataDog/" \ - clone --branch dd-trace-go https://github.com/DataDog/benchmarking-platform ${PLATFORM_DIR} - - # apply the specific step scripts - cp -r .gitlab/benchmarks/steps/* ${PLATFORM_DIR}/steps/ - chmod a+x ${PLATFORM_DIR}/steps/* - - # check for mode validity - ${PLATFORM_DIR}/steps/check_modes.sh - if [ "$(cat .job_status)" == "SKIP" ]; then exit 0; fi - - # run benchmarks - ${PLATFORM_DIR}/steps/capture-hardware-software-info.sh - ${PLATFORM_DIR}/steps/run-benchmarks.sh - ${PLATFORM_DIR}/steps/analyze-results.sh - ${PLATFORM_DIR}/steps/upload-results-to-s3.sh - parallel: - matrix: - - RUN_MODE: ["cpu", "wall", "alloc", "memleak", "cpu,wall", "memleak,alloc", "cpu,wall,alloc,memleak"] - artifacts: - when: always - name: "reports" - paths: - - reports/ - expire_in: 3 months - -benchmarks-candidate-amd64: - extends: .benchmark_job - tags: ["arch:amd64"] - image: $BENCHMARK_IMAGE_AMD64 - -benchmarks-candidate-aarch64: - extends: .benchmark_job - tags: ["arch:arm64"] - image: $BENCHMARK_IMAGE_ARM64 variables: - KUBERNETES_MEMORY_REQUEST: 200Gi - KUBERNETES_MEMORY_LIMIT: 200Gi + CANDIDATE_VERSION: "${CURRENT_VERSION}" + BASELINE_VERSION: "${PREVIOUS_VERSION}" + BENCHMARK_ITERATIONS: "${BENCHMARK_ITERATIONS:-5}" + BENCHMARK_MODES: "${BENCHMARK_MODES:-cpu,wall,alloc,memleak}" + DDPROF_COMMIT_SHA: "${CI_COMMIT_SHA}" + DDPROF_COMMIT_BRANCH: "${CI_COMMIT_BRANCH}" + UPSTREAM_PROJECT_NAME: "java-profiler" + UPSTREAM_BRANCH: "${CI_PIPELINE_ID}" + UPSTREAM_PIPELINE_ID: "${CI_PIPELINE_ID}" + trigger: + project: DataDog/apm-reliability/benchmarking-platform + branch: java-profiler + strategy: depend post-benchmarks-pr-comment: extends: .retry-config - stage: benchmarks + stage: post-benchmarks tags: ["arch:arm64"] image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1 id_tokens: @@ -90,10 +51,6 @@ post-benchmarks-pr-comment: needs: - job: prepare:start artifacts: true - - job: benchmarks-candidate-amd64 - artifacts: true - - job: benchmarks-candidate-aarch64 - artifacts: true rules: - if: '$JDK_VERSION != null || $DEBUG_LEVEL != null || $HASH != null || $DOWNSTREAM != null' when: never @@ -101,36 +58,38 @@ post-benchmarks-pr-comment: when: never - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' when: never - # Always run when the candidate jobs ran, regardless of source, so results - # are posted back to the PR. + # Always run after the benchmarks stage so results are posted back to the PR. - when: always - timeout: 5m + timeout: 10m script: + - mkdir -p reports + - .gitlab/benchmarks/download-s3-reports.sh reports - .gitlab/benchmarks/post-pr-comment.sh reports allow_failure: true publish-benchmark-gh-pages: - stage: benchmarks + stage: post-benchmarks tags: ["arch:arm64"] image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1 id_tokens: DDOCTOSTS_ID_TOKEN: aud: dd-octo-sts - needs: - - job: benchmarks-candidate-amd64 - artifacts: true - - job: benchmarks-candidate-aarch64 - artifacts: true + needs: [] + # Serialize concurrent GH Pages pushes. publish-gh-pages.sh uses + # 'git push --force'; two concurrent pushes race and the slower one + # silently discards the faster one's history update. + resource_group: gh-pages-publish rules: - if: '$CI_PIPELINE_SOURCE == "schedule"' when: never - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH || $CI_COMMIT_BRANCH == "main"' when: always - timeout: 10m + timeout: 15m script: - - ./.gitlab/benchmarks/publish-gh-pages.sh + - mkdir -p reports + - .gitlab/benchmarks/download-s3-reports.sh reports + - ./.gitlab/benchmarks/publish-gh-pages.sh reports allow_failure: true include: - local: .gitlab/common.yml - - local: .gitlab/benchmarks/images.yml diff --git a/.gitlab/benchmarks/download-s3-reports.sh b/.gitlab/benchmarks/download-s3-reports.sh new file mode 100755 index 000000000..51862cdcc --- /dev/null +++ b/.gitlab/benchmarks/download-s3-reports.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Downloads benchmark reports uploaded by the BP pipeline for this pipeline run. +# +# The BP upload key is: +# s3://relenv-benchmarking-data/java-profiler/${CI_PIPELINE_ID}/${BP_JOB_ID}/ +# We pass UPSTREAM_PIPELINE_ID=${CI_PIPELINE_ID} and UPSTREAM_BRANCH=${CI_PIPELINE_ID} +# to BP; each BP job uploads to its own CI_JOB_ID leaf under that prefix, so +# syncing the whole prefix captures all arch+mode results. +set -euo pipefail + +DEST="${1:-reports}" +S3_BUCKET="relenv-benchmarking-data" +S3_PREFIX="java-profiler/${CI_PIPELINE_ID}" + +mkdir -p "${DEST}" +aws s3 sync "s3://${S3_BUCKET}/${S3_PREFIX}/" "${DEST}/" \ + --exclude "*" \ + --include "comparison-baseline-vs-candidate_*.md" \ + --include "comparison-baseline-vs-candidate_*.html" \ + --include "*.json" + +echo "Downloaded from s3://${S3_BUCKET}/${S3_PREFIX}/ → ${DEST}/" +FILE_COUNT=$(find "${DEST}" -type f | wc -l) +echo "Files downloaded: ${FILE_COUNT}" +if [ "${FILE_COUNT}" -eq 0 ]; then + echo "ERROR: no benchmark reports found — BP pipeline may not have uploaded yet" >&2 + exit 1 +fi From 2be4d5ab978e273499f06eb70feee319b535773f Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 21:50:34 +0200 Subject: [PATCH 2/8] feat(ci): add reliability table to benchmark PR comment --- .gitlab/benchmarks/post-pr-comment.sh | 56 +++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/.gitlab/benchmarks/post-pr-comment.sh b/.gitlab/benchmarks/post-pr-comment.sh index de410dff9..f06c413ef 100755 --- a/.gitlab/benchmarks/post-pr-comment.sh +++ b/.gitlab/benchmarks/post-pr-comment.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash -# Post aggregated benchmark comparison results as a single PR comment. +# Post aggregated benchmark results as a single PR comment. # -# Expects all per-cell comparison-baseline-vs-candidate_*.md reports to be -# present under REPORTS_DIR (default: reports/). +# Handles two report types found under REPORTS_DIR (default: reports/): +# - comparison-baseline-vs-candidate_*.md (perf comparison benchmarks) +# - result_${BENCHMARK}_${JDK}_${LIBRARY}.json (reliability benchmarks) # # Required env: # DDPROF_COMMIT_BRANCH – branch name used to locate the open PR @@ -14,7 +15,7 @@ set -euo pipefail REPORTS_DIR="${1:-reports}" HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Aggregate all per-cell reports into a single comment body +# --- Perf comparison reports (markdown, one per benchmark cell) --- SECTIONS="" for md in "${REPORTS_DIR}"/comparison-baseline-vs-candidate_*.md; do [ -f "${md}" ] || continue @@ -28,6 +29,53 @@ $(cat "${md}") " done +# --- Reliability reports (JSON, one per benchmark×JDK×library) --- +RELIABILITY_TABLE=$(python3 - "${REPORTS_DIR}" <<'PYEOF' +import json, sys, glob, collections + +reports_dir = sys.argv[1] +results = collections.defaultdict(dict) + +for path in sorted(glob.glob(f"{reports_dir}/result_*.json")): + try: + with open(path) as f: + r = json.load(f) + key = (r["benchmark"], r["jdk"]) + results[key][r["library"]] = r + except Exception: + continue + +if not results: + sys.exit(0) + +lines = [ + "### Reliability Benchmarks", + "", + "| Benchmark | JDK | Latest | Dev |", + "|-----------|-----|--------|-----|", +] +for (bench, jdk) in sorted(results.keys()): + libs = results[(bench, jdk)] + + def fmt(lib): + if lib not in libs: + return "—" + r = libs[lib] + icon = "💥" if r.get("crashed") else "✅" + avg = r.get("avg_ms", 0) + cnt = r.get("run_count", 0) + return f"{icon} {avg} ms ({cnt} iters)" + + lines.append(f"| {bench} | {jdk} | {fmt('latest')} | {fmt('dev')} |") + +print("\n".join(lines)) +PYEOF +) || true + +[ -n "${RELIABILITY_TABLE}" ] && SECTIONS="${SECTIONS} +${RELIABILITY_TABLE} +" + if [ -z "${SECTIONS}" ]; then echo "No benchmark reports found under ${REPORTS_DIR} — skipping comment" exit 0 From ba307c6d9400808ce51ddc855f5afe45a2ed5317 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 22:04:54 +0200 Subject: [PATCH 3/8] feat(ci): add delta%, upload count, issues to reliability table --- .gitlab/benchmarks/post-pr-comment.sh | 49 +++++++++++++++++++++------ 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/.gitlab/benchmarks/post-pr-comment.sh b/.gitlab/benchmarks/post-pr-comment.sh index f06c413ef..40f46a3e2 100755 --- a/.gitlab/benchmarks/post-pr-comment.sh +++ b/.gitlab/benchmarks/post-pr-comment.sh @@ -48,25 +48,52 @@ for path in sorted(glob.glob(f"{reports_dir}/result_*.json")): if not results: sys.exit(0) +def fmt_avg(r): + icon = "💥" if r.get("crashed") else "✅" + avg = r.get("avg_ms", 0) + cnt = r.get("run_count", 0) + return f"{icon} {avg} ms ({cnt} iters)" + +def fmt_delta(latest, dev): + la, da = latest.get("avg_ms", 0), dev.get("avg_ms", 0) + if not la or not da: + return "—" + pct = (da - la) / la * 100 + # positive = dev is slower (potential regression) + arrow = "🔴 +" if pct > 2 else ("🟢 " if pct < -2 else "") + return f"{arrow}{pct:+.1f}%" + +def fmt_uploads(r): + return str(r.get("upload_count", 0)) + +def fmt_issues(r): + e = r.get("error_count", 0) + w = r.get("warn_count", 0) + if e == 0 and w == 0: + return "—" + parts = [] + if e: parts.append(f"E:{e}") + if w: parts.append(f"W:{w}") + return "⚠️ " + " ".join(parts) + lines = [ "### Reliability Benchmarks", "", - "| Benchmark | JDK | Latest | Dev |", - "|-----------|-----|--------|-----|", + "| Benchmark | JDK | Latest | Dev | Δ% (dev vs latest) | Uploads L/D | Issues L/D |", + "|-----------|-----|--------|-----|-------------------|-------------|------------|", ] for (bench, jdk) in sorted(results.keys()): libs = results[(bench, jdk)] + latest = libs.get("latest", {}) + dev = libs.get("dev", {}) - def fmt(lib): - if lib not in libs: - return "—" - r = libs[lib] - icon = "💥" if r.get("crashed") else "✅" - avg = r.get("avg_ms", 0) - cnt = r.get("run_count", 0) - return f"{icon} {avg} ms ({cnt} iters)" + col_latest = fmt_avg(latest) if latest else "—" + col_dev = fmt_avg(dev) if dev else "—" + col_delta = fmt_delta(latest, dev) if (latest and dev) else "—" + col_uploads = f"{fmt_uploads(latest)} / {fmt_uploads(dev)}" + col_issues = f"{fmt_issues(latest)} / {fmt_issues(dev)}" - lines.append(f"| {bench} | {jdk} | {fmt('latest')} | {fmt('dev')} |") + lines.append(f"| {bench} | {jdk} | {col_latest} | {col_dev} | {col_delta} | {col_uploads} | {col_issues} |") print("\n".join(lines)) PYEOF From e527a2f8a29222c822aac1fa0312ae65e4d4c513 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 22:22:17 +0200 Subject: [PATCH 4/8] fix(ci): use boto3 for S3 download; add benchmarking-platform SA to post-benchmark jobs --- .gitlab/benchmarks/.gitlab-ci.yml | 4 ++ .gitlab/benchmarks/download-s3-reports.sh | 48 +++++++++++++++++------ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.gitlab/benchmarks/.gitlab-ci.yml b/.gitlab/benchmarks/.gitlab-ci.yml index e322fb131..18cd8c693 100644 --- a/.gitlab/benchmarks/.gitlab-ci.yml +++ b/.gitlab/benchmarks/.gitlab-ci.yml @@ -48,6 +48,8 @@ post-benchmarks-pr-comment: id_tokens: DDOCTOSTS_ID_TOKEN: aud: dd-octo-sts + variables: + KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: benchmarking-platform needs: - job: prepare:start artifacts: true @@ -74,6 +76,8 @@ publish-benchmark-gh-pages: id_tokens: DDOCTOSTS_ID_TOKEN: aud: dd-octo-sts + variables: + KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: benchmarking-platform needs: [] # Serialize concurrent GH Pages pushes. publish-gh-pages.sh uses # 'git push --force'; two concurrent pushes race and the slower one diff --git a/.gitlab/benchmarks/download-s3-reports.sh b/.gitlab/benchmarks/download-s3-reports.sh index 51862cdcc..bb7aaafa5 100755 --- a/.gitlab/benchmarks/download-s3-reports.sh +++ b/.gitlab/benchmarks/download-s3-reports.sh @@ -6,6 +6,9 @@ # We pass UPSTREAM_PIPELINE_ID=${CI_PIPELINE_ID} and UPSTREAM_BRANCH=${CI_PIPELINE_ID} # to BP; each BP job uploads to its own CI_JOB_ID leaf under that prefix, so # syncing the whole prefix captures all arch+mode results. +# +# Uses boto3 (Python) rather than the aws CLI — the post-benchmark jobs run in +# dd-octo-sts-ci-base which ships Python but not the aws CLI binary. set -euo pipefail DEST="${1:-reports}" @@ -13,16 +16,35 @@ S3_BUCKET="relenv-benchmarking-data" S3_PREFIX="java-profiler/${CI_PIPELINE_ID}" mkdir -p "${DEST}" -aws s3 sync "s3://${S3_BUCKET}/${S3_PREFIX}/" "${DEST}/" \ - --exclude "*" \ - --include "comparison-baseline-vs-candidate_*.md" \ - --include "comparison-baseline-vs-candidate_*.html" \ - --include "*.json" - -echo "Downloaded from s3://${S3_BUCKET}/${S3_PREFIX}/ → ${DEST}/" -FILE_COUNT=$(find "${DEST}" -type f | wc -l) -echo "Files downloaded: ${FILE_COUNT}" -if [ "${FILE_COUNT}" -eq 0 ]; then - echo "ERROR: no benchmark reports found — BP pipeline may not have uploaded yet" >&2 - exit 1 -fi + +# Ensure boto3 is available; install quietly if missing. +python3 -c "import boto3" 2>/dev/null || pip3 install --quiet boto3 + +python3 - "${DEST}" "${S3_BUCKET}" "${S3_PREFIX}" <<'PYEOF' +import boto3, os, sys + +dest, bucket, prefix = sys.argv[1], sys.argv[2], sys.argv[3] + +INCLUDE_PATTERNS = (".json", "-vs-candidate.md", "-vs-candidate.html") + +s3 = boto3.client("s3") +paginator = s3.get_paginator("list_objects_v2") + +downloaded = 0 +for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get("Contents", []): + key = obj["Key"] + name = os.path.basename(key) + if not any(name.endswith(p) for p in INCLUDE_PATTERNS): + continue + local_path = os.path.join(dest, name) + print(f" {key} → {name}") + s3.download_file(bucket, key, local_path) + downloaded += 1 + +print(f"Downloaded {downloaded} file(s) from s3://{bucket}/{prefix}/") +if downloaded == 0: + print("ERROR: no benchmark reports found — BP pipeline may not have uploaded yet", + file=sys.stderr) + sys.exit(1) +PYEOF From 200d347e13a968bd9f134c5dfc5b992552d217ff Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 22:29:43 +0200 Subject: [PATCH 5/8] fix(ci): fetch BP artifacts via GitLab API instead of aws s3 sync --- .gitlab/benchmarks/.gitlab-ci.yml | 4 - .gitlab/benchmarks/download-s3-reports.sh | 100 ++++++++++++++-------- 2 files changed, 63 insertions(+), 41 deletions(-) diff --git a/.gitlab/benchmarks/.gitlab-ci.yml b/.gitlab/benchmarks/.gitlab-ci.yml index 18cd8c693..e322fb131 100644 --- a/.gitlab/benchmarks/.gitlab-ci.yml +++ b/.gitlab/benchmarks/.gitlab-ci.yml @@ -48,8 +48,6 @@ post-benchmarks-pr-comment: id_tokens: DDOCTOSTS_ID_TOKEN: aud: dd-octo-sts - variables: - KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: benchmarking-platform needs: - job: prepare:start artifacts: true @@ -76,8 +74,6 @@ publish-benchmark-gh-pages: id_tokens: DDOCTOSTS_ID_TOKEN: aud: dd-octo-sts - variables: - KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: benchmarking-platform needs: [] # Serialize concurrent GH Pages pushes. publish-gh-pages.sh uses # 'git push --force'; two concurrent pushes race and the slower one diff --git a/.gitlab/benchmarks/download-s3-reports.sh b/.gitlab/benchmarks/download-s3-reports.sh index bb7aaafa5..cef92a9f3 100755 --- a/.gitlab/benchmarks/download-s3-reports.sh +++ b/.gitlab/benchmarks/download-s3-reports.sh @@ -1,50 +1,76 @@ #!/usr/bin/env bash -# Downloads benchmark reports uploaded by the BP pipeline for this pipeline run. +# Downloads result JSONs from the BP downstream pipeline via the GitLab CI API. # -# The BP upload key is: -# s3://relenv-benchmarking-data/java-profiler/${CI_PIPELINE_ID}/${BP_JOB_ID}/ -# We pass UPSTREAM_PIPELINE_ID=${CI_PIPELINE_ID} and UPSTREAM_BRANCH=${CI_PIPELINE_ID} -# to BP; each BP job uploads to its own CI_JOB_ID leaf under that prefix, so -# syncing the whole prefix captures all arch+mode results. -# -# Uses boto3 (Python) rather than the aws CLI — the post-benchmark jobs run in -# dd-octo-sts-ci-base which ships Python but not the aws CLI binary. +# Requires only curl and python3 (stdlib) — no aws CLI, pip, or boto3 needed. +# BP jobs already store artifacts in GitLab; this fetches them directly from +# the downstream pipeline triggered by benchmarks-trigger. set -euo pipefail DEST="${1:-reports}" -S3_BUCKET="relenv-benchmarking-data" -S3_PREFIX="java-profiler/${CI_PIPELINE_ID}" - mkdir -p "${DEST}" -# Ensure boto3 is available; install quietly if missing. -python3 -c "import boto3" 2>/dev/null || pip3 install --quiet boto3 +TMPDIR_LOCAL=$(mktemp -d) +trap 'rm -rf "${TMPDIR_LOCAL}"' EXIT + +# ── 1. find the benchmarks-trigger bridge to get downstream project + pipeline ── +BRIDGES_FILE="${TMPDIR_LOCAL}/bridges.json" +curl -sf \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/bridges" \ + > "${BRIDGES_FILE}" -python3 - "${DEST}" "${S3_BUCKET}" "${S3_PREFIX}" <<'PYEOF' -import boto3, os, sys +read -r BP_PROJECT_ID DOWNSTREAM_PIPELINE_ID < <(python3 - "${BRIDGES_FILE}" <<'PYEOF' +import json, sys +with open(sys.argv[1]) as f: + bridges = json.load(f) +for b in bridges: + if b.get("name") == "benchmarks-trigger": + dp = b.get("downstream_pipeline") or {} + if dp.get("id") and dp.get("project_id"): + print(dp["project_id"], dp["id"]) + sys.exit(0) +print("", "") +PYEOF +) -dest, bucket, prefix = sys.argv[1], sys.argv[2], sys.argv[3] +if [ -z "${DOWNSTREAM_PIPELINE_ID:-}" ]; then + echo "No downstream BP pipeline found for benchmarks-trigger — skipping download" + exit 0 +fi +echo "BP downstream pipeline: project=${BP_PROJECT_ID} pipeline=${DOWNSTREAM_PIPELINE_ID}" -INCLUDE_PATTERNS = (".json", "-vs-candidate.md", "-vs-candidate.html") +# ── 2. list jobs in the downstream pipeline ── +JOBS_FILE="${TMPDIR_LOCAL}/jobs.json" +curl -sf \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/pipelines/${DOWNSTREAM_PIPELINE_ID}/jobs?per_page=100" \ + > "${JOBS_FILE}" -s3 = boto3.client("s3") -paginator = s3.get_paginator("list_objects_v2") +JOB_IDS=$(python3 -c " +import json +with open('${JOBS_FILE}') as f: + print(' '.join(str(j['id']) for j in json.load(f))) +") -downloaded = 0 -for page in paginator.paginate(Bucket=bucket, Prefix=prefix): - for obj in page.get("Contents", []): - key = obj["Key"] - name = os.path.basename(key) - if not any(name.endswith(p) for p in INCLUDE_PATTERNS): - continue - local_path = os.path.join(dest, name) - print(f" {key} → {name}") - s3.download_file(bucket, key, local_path) - downloaded += 1 +# ── 3. download result_*.json from each job's artifact zip ── +DOWNLOADED=0 +for JOB_ID in ${JOB_IDS}; do + ART_ZIP="${TMPDIR_LOCAL}/art_${JOB_ID}.zip" + if curl -sf \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/jobs/${JOB_ID}/artifacts" \ + --output "${ART_ZIP}" 2>/dev/null; then + # -j: junk paths (strip the artifacts/ prefix), quiet, overwrite + if unzip -q -j "${ART_ZIP}" "artifacts/result_*.json" -d "${DEST}/" 2>/dev/null; then + DOWNLOADED=$((DOWNLOADED + 1)) + fi + fi +done -print(f"Downloaded {downloaded} file(s) from s3://{bucket}/{prefix}/") -if downloaded == 0: - print("ERROR: no benchmark reports found — BP pipeline may not have uploaded yet", - file=sys.stderr) - sys.exit(1) -PYEOF +RESULT_COUNT=$(find "${DEST}" -name "result_*.json" | wc -l) +echo "result_*.json files downloaded: ${RESULT_COUNT} (from ${DOWNLOADED} BP job(s))" + +if [ "${RESULT_COUNT}" -eq 0 ]; then + echo "WARNING: no result JSONs found — BP jobs may not have run yet" >&2 + exit 1 +fi From 5f658b0dfd0d6a8de4d52a89cb8c7e421cd1e079 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 22:38:34 +0200 Subject: [PATCH 6/8] fix(ci): diagnose API errors explicitly, no silent set -e kill --- .gitlab/benchmarks/download-s3-reports.sh | 55 ++++++++++++++++------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/.gitlab/benchmarks/download-s3-reports.sh b/.gitlab/benchmarks/download-s3-reports.sh index cef92a9f3..762062e7c 100755 --- a/.gitlab/benchmarks/download-s3-reports.sh +++ b/.gitlab/benchmarks/download-s3-reports.sh @@ -4,7 +4,7 @@ # Requires only curl and python3 (stdlib) — no aws CLI, pip, or boto3 needed. # BP jobs already store artifacts in GitLab; this fetches them directly from # the downstream pipeline triggered by benchmarks-trigger. -set -euo pipefail +set -uo pipefail # intentionally no -e: we handle errors explicitly DEST="${1:-reports}" mkdir -p "${DEST}" @@ -12,12 +12,30 @@ mkdir -p "${DEST}" TMPDIR_LOCAL=$(mktemp -d) trap 'rm -rf "${TMPDIR_LOCAL}"' EXIT -# ── 1. find the benchmarks-trigger bridge to get downstream project + pipeline ── +# ── helper: curl with explicit HTTP status checking ────────────────────────── +# Usage: api_get +# Returns 0 on 2xx, prints diagnostics and returns 1 otherwise. +api_get() { + local url="$1" out="$2" + local http_code + http_code=$(curl -s -o "${out}" -w "%{http_code}" \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" "${url}") + if [[ "${http_code}" != 2* ]]; then + echo " API ${url##*/}: HTTP ${http_code} — $(cat "${out}" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('message','?'))" 2>/dev/null || echo 'see above')" + return 1 + fi + return 0 +} + +# ── 1. find the benchmarks-trigger bridge ──────────────────────────────────── BRIDGES_FILE="${TMPDIR_LOCAL}/bridges.json" -curl -sf \ - --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ +echo "Querying bridges for pipeline ${CI_PIPELINE_ID}…" +if ! api_get \ "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/bridges" \ - > "${BRIDGES_FILE}" + "${BRIDGES_FILE}"; then + echo "Cannot read pipeline bridges (job token may lack Reporter access) — skipping download" + exit 0 +fi read -r BP_PROJECT_ID DOWNSTREAM_PIPELINE_ID < <(python3 - "${BRIDGES_FILE}" <<'PYEOF' import json, sys @@ -34,17 +52,20 @@ PYEOF ) if [ -z "${DOWNSTREAM_PIPELINE_ID:-}" ]; then - echo "No downstream BP pipeline found for benchmarks-trigger — skipping download" + echo "benchmarks-trigger bridge not found or did not run — skipping download" exit 0 fi echo "BP downstream pipeline: project=${BP_PROJECT_ID} pipeline=${DOWNSTREAM_PIPELINE_ID}" -# ── 2. list jobs in the downstream pipeline ── +# ── 2. list jobs in the downstream pipeline ────────────────────────────────── JOBS_FILE="${TMPDIR_LOCAL}/jobs.json" -curl -sf \ - --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ +echo "Listing BP pipeline jobs…" +if ! api_get \ "${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/pipelines/${DOWNSTREAM_PIPELINE_ID}/jobs?per_page=100" \ - > "${JOBS_FILE}" + "${JOBS_FILE}"; then + echo "Cannot list BP pipeline jobs — skipping download" + exit 0 +fi JOB_IDS=$(python3 -c " import json @@ -52,15 +73,15 @@ with open('${JOBS_FILE}') as f: print(' '.join(str(j['id']) for j in json.load(f))) ") -# ── 3. download result_*.json from each job's artifact zip ── +# ── 3. download result_*.json from each job's artifact zip ─────────────────── DOWNLOADED=0 for JOB_ID in ${JOB_IDS}; do ART_ZIP="${TMPDIR_LOCAL}/art_${JOB_ID}.zip" - if curl -sf \ + ART_STATUS=$(curl -s -o "${ART_ZIP}" -w "%{http_code}" \ --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ - "${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/jobs/${JOB_ID}/artifacts" \ - --output "${ART_ZIP}" 2>/dev/null; then - # -j: junk paths (strip the artifacts/ prefix), quiet, overwrite + "${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/jobs/${JOB_ID}/artifacts" 2>/dev/null) + if [[ "${ART_STATUS}" == 2* ]]; then + # -j: junk paths (strip artifacts/ prefix), -q: quiet, -o: overwrite if unzip -q -j "${ART_ZIP}" "artifacts/result_*.json" -d "${DEST}/" 2>/dev/null; then DOWNLOADED=$((DOWNLOADED + 1)) fi @@ -68,9 +89,9 @@ for JOB_ID in ${JOB_IDS}; do done RESULT_COUNT=$(find "${DEST}" -name "result_*.json" | wc -l) -echo "result_*.json files downloaded: ${RESULT_COUNT} (from ${DOWNLOADED} BP job(s))" +echo "result_*.json files: ${RESULT_COUNT} (from ${DOWNLOADED} BP job(s))" if [ "${RESULT_COUNT}" -eq 0 ]; then - echo "WARNING: no result JSONs found — BP jobs may not have run yet" >&2 + echo "WARNING: no result JSONs found — BP jobs may not have run or produced artifacts yet" exit 1 fi From 7580df27de5bad718f9ea06830053d29d3ed9302 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 19 Jun 2026 08:56:20 +0200 Subject: [PATCH 7/8] fix: pass MAVEN_REPOSITORY_PROXY to BP trigger --- .gitlab/benchmarks/.gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/benchmarks/.gitlab-ci.yml b/.gitlab/benchmarks/.gitlab-ci.yml index e322fb131..318a62fa8 100644 --- a/.gitlab/benchmarks/.gitlab-ci.yml +++ b/.gitlab/benchmarks/.gitlab-ci.yml @@ -35,6 +35,7 @@ benchmarks-trigger: UPSTREAM_PROJECT_NAME: "java-profiler" UPSTREAM_BRANCH: "${CI_PIPELINE_ID}" UPSTREAM_PIPELINE_ID: "${CI_PIPELINE_ID}" + MAVEN_REPOSITORY_PROXY: "${MAVEN_REPOSITORY_PROXY}" trigger: project: DataDog/apm-reliability/benchmarking-platform branch: java-profiler From 7d9f6799161980c4ca9257450e604dc5e433fe7c Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 19 Jun 2026 09:49:24 +0200 Subject: [PATCH 8/8] fix: extend octo-sts trust to BP; wait for benchmarks-trigger before comment --- .github/chainguard/async-profiler-build.ci.sts.yaml | 4 ++-- .gitlab/benchmarks/.gitlab-ci.yml | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/chainguard/async-profiler-build.ci.sts.yaml b/.github/chainguard/async-profiler-build.ci.sts.yaml index 339ec1a03..8d3254f6f 100644 --- a/.github/chainguard/async-profiler-build.ci.sts.yaml +++ b/.github/chainguard/async-profiler-build.ci.sts.yaml @@ -1,7 +1,7 @@ -# Allow java-profiler GitLab CI to publish reports and manage issues +# Allow java-profiler and benchmarking-platform GitLab CI to post reports issuer: https://gitlab.ddbuild.io -subject_pattern: "project_path:DataDog/java-profiler:ref_type:branch:ref:.*" +subject_pattern: "project_path:(DataDog/java-profiler|DataDog/apm-reliability/benchmarking-platform):ref_type:branch:ref:.*" permissions: contents: write diff --git a/.gitlab/benchmarks/.gitlab-ci.yml b/.gitlab/benchmarks/.gitlab-ci.yml index 318a62fa8..18359cf02 100644 --- a/.gitlab/benchmarks/.gitlab-ci.yml +++ b/.gitlab/benchmarks/.gitlab-ci.yml @@ -52,6 +52,8 @@ post-benchmarks-pr-comment: needs: - job: prepare:start artifacts: true + - job: benchmarks-trigger + artifacts: false rules: - if: '$JDK_VERSION != null || $DEBUG_LEVEL != null || $HASH != null || $DOWNSTREAM != null' when: never @@ -75,7 +77,9 @@ publish-benchmark-gh-pages: id_tokens: DDOCTOSTS_ID_TOKEN: aud: dd-octo-sts - needs: [] + needs: + - job: benchmarks-trigger + artifacts: false # Serialize concurrent GH Pages pushes. publish-gh-pages.sh uses # 'git push --force'; two concurrent pushes race and the slower one # silently discards the faster one's history update.