Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ stages:
- integration-test
- reliability
- benchmarks
- post-benchmarks
- fuzz
- notify

Expand Down
107 changes: 33 additions & 74 deletions .gitlab/benchmarks/.gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ variables:
PREPARE_IMAGE: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:latest
DD_OCTO_STS_IMAGE: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1

.benchmark_job:
extends: .deploy-sa
# Bridge job: triggers the BP pipeline and blocks until it completes.
# Bridge jobs cannot appear in other jobs' needs: — downstream jobs use
# stage ordering (post-benchmarks stage runs after benchmarks stage).
benchmarks-trigger:
stage: benchmarks
timeout: 6h
variables:
ITERATIONS: "${BENCHMARK_ITERATIONS:-1}"
MODES: "${BENCHMARK_MODES:-cpu,wall,alloc,memleak}"
needs:
- job: get-versions
artifacts: true
Expand All @@ -24,64 +22,27 @@ variables:
- if: '$CI_PIPELINE_SOURCE == "web"'
when: manual
allow_failure: true
# Run automatically and non-blocking on any other source (push/trigger/api/
# etc.) — mirrors the integration-test rules. The before_script CANCELLED
# gate skips branches with no open PR.
# Run automatically and non-blocking on any other source (push/trigger/api/etc.)
- when: on_success
allow_failure: true
script: |
# setup the env
export ARTIFACTS_DIR="$(pwd)/reports" && (mkdir "${ARTIFACTS_DIR}" || :)
export CANDIDATE_VERSION=${CURRENT_VERSION}
export BASELINE_VERSION=${PREVIOUS_VERSION}
export PLATFORM_DIR=".benchmarks/platform"

# check for missing candidate version
if [ -z "${CANDIDATE_VERSION}" ]; then echo "Missing candidate version. Skipping."; exit 0; fi

# fetch the common platform scripts
git -c url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf="https://github.com/DataDog/" \
clone --branch dd-trace-go https://github.com/DataDog/benchmarking-platform ${PLATFORM_DIR}

# apply the specific step scripts
cp -r .gitlab/benchmarks/steps/* ${PLATFORM_DIR}/steps/
chmod a+x ${PLATFORM_DIR}/steps/*

# check for mode validity
${PLATFORM_DIR}/steps/check_modes.sh
if [ "$(cat .job_status)" == "SKIP" ]; then exit 0; fi

# run benchmarks
${PLATFORM_DIR}/steps/capture-hardware-software-info.sh
${PLATFORM_DIR}/steps/run-benchmarks.sh
${PLATFORM_DIR}/steps/analyze-results.sh
${PLATFORM_DIR}/steps/upload-results-to-s3.sh
parallel:
matrix:
- RUN_MODE: ["cpu", "wall", "alloc", "memleak", "cpu,wall", "memleak,alloc", "cpu,wall,alloc,memleak"]
artifacts:
when: always
name: "reports"
paths:
- reports/
expire_in: 3 months

benchmarks-candidate-amd64:
extends: .benchmark_job
tags: ["arch:amd64"]
image: $BENCHMARK_IMAGE_AMD64

benchmarks-candidate-aarch64:
extends: .benchmark_job
tags: ["arch:arm64"]
image: $BENCHMARK_IMAGE_ARM64
variables:
KUBERNETES_MEMORY_REQUEST: 200Gi
KUBERNETES_MEMORY_LIMIT: 200Gi
CANDIDATE_VERSION: "${CURRENT_VERSION}"
BASELINE_VERSION: "${PREVIOUS_VERSION}"
BENCHMARK_ITERATIONS: "${BENCHMARK_ITERATIONS:-5}"
BENCHMARK_MODES: "${BENCHMARK_MODES:-cpu,wall,alloc,memleak}"
DDPROF_COMMIT_SHA: "${CI_COMMIT_SHA}"
DDPROF_COMMIT_BRANCH: "${CI_COMMIT_BRANCH}"
UPSTREAM_PROJECT_NAME: "java-profiler"
UPSTREAM_BRANCH: "${CI_PIPELINE_ID}"
UPSTREAM_PIPELINE_ID: "${CI_PIPELINE_ID}"
trigger:
project: DataDog/apm-reliability/benchmarking-platform
branch: java-profiler
strategy: depend

post-benchmarks-pr-comment:
extends: .retry-config
stage: benchmarks
stage: post-benchmarks
tags: ["arch:arm64"]
image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1
id_tokens:
Expand All @@ -90,47 +51,45 @@ post-benchmarks-pr-comment:
needs:
- job: prepare:start
artifacts: true
- job: benchmarks-candidate-amd64
artifacts: true
- job: benchmarks-candidate-aarch64
artifacts: true
rules:
- if: '$JDK_VERSION != null || $DEBUG_LEVEL != null || $HASH != null || $DOWNSTREAM != null'
when: never
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
when: never
# Always run when the candidate jobs ran, regardless of source, so results
# are posted back to the PR.
# Always run after the benchmarks stage so results are posted back to the PR.
- when: always
timeout: 5m
timeout: 10m
script:
- mkdir -p reports
- .gitlab/benchmarks/download-s3-reports.sh reports
- .gitlab/benchmarks/post-pr-comment.sh reports
allow_failure: true

publish-benchmark-gh-pages:
stage: benchmarks
stage: post-benchmarks
tags: ["arch:arm64"]
image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1
id_tokens:
DDOCTOSTS_ID_TOKEN:
aud: dd-octo-sts
needs:
- job: benchmarks-candidate-amd64
artifacts: true
- job: benchmarks-candidate-aarch64
artifacts: true
needs: []
# Serialize concurrent GH Pages pushes. publish-gh-pages.sh uses
# 'git push --force'; two concurrent pushes race and the slower one
# silently discards the faster one's history update.
resource_group: gh-pages-publish
rules:
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH || $CI_COMMIT_BRANCH == "main"'
when: always
timeout: 10m
timeout: 15m
script:
- ./.gitlab/benchmarks/publish-gh-pages.sh
- mkdir -p reports
- .gitlab/benchmarks/download-s3-reports.sh reports
- ./.gitlab/benchmarks/publish-gh-pages.sh reports
allow_failure: true

include:
- local: .gitlab/common.yml
- local: .gitlab/benchmarks/images.yml
97 changes: 97 additions & 0 deletions .gitlab/benchmarks/download-s3-reports.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env bash
# Downloads result JSONs from the BP downstream pipeline via the GitLab CI API.
#
# Requires only curl and python3 (stdlib) — no aws CLI, pip, or boto3 needed.
# BP jobs already store artifacts in GitLab; this fetches them directly from
# the downstream pipeline triggered by benchmarks-trigger.
set -uo pipefail # intentionally no -e: we handle errors explicitly

DEST="${1:-reports}"
mkdir -p "${DEST}"

TMPDIR_LOCAL=$(mktemp -d)
trap 'rm -rf "${TMPDIR_LOCAL}"' EXIT

# ── helper: curl with explicit HTTP status checking ──────────────────────────
# Usage: api_get <url> <output_file>
# Returns 0 on 2xx, prints diagnostics and returns 1 otherwise.
api_get() {
local url="$1" out="$2"
local http_code
http_code=$(curl -s -o "${out}" -w "%{http_code}" \
--header "JOB-TOKEN: ${CI_JOB_TOKEN}" "${url}")
if [[ "${http_code}" != 2* ]]; then
echo " API ${url##*/}: HTTP ${http_code} — $(cat "${out}" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('message','?'))" 2>/dev/null || echo 'see above')"
return 1
fi
return 0
}

# ── 1. find the benchmarks-trigger bridge ────────────────────────────────────
BRIDGES_FILE="${TMPDIR_LOCAL}/bridges.json"
echo "Querying bridges for pipeline ${CI_PIPELINE_ID}…"
if ! api_get \
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/bridges" \
"${BRIDGES_FILE}"; then
echo "Cannot read pipeline bridges (job token may lack Reporter access) — skipping download"
exit 0
fi

read -r BP_PROJECT_ID DOWNSTREAM_PIPELINE_ID < <(python3 - "${BRIDGES_FILE}" <<'PYEOF'
import json, sys
with open(sys.argv[1]) as f:
bridges = json.load(f)
for b in bridges:
if b.get("name") == "benchmarks-trigger":
dp = b.get("downstream_pipeline") or {}
if dp.get("id") and dp.get("project_id"):
print(dp["project_id"], dp["id"])
sys.exit(0)
print("", "")
PYEOF
)

if [ -z "${DOWNSTREAM_PIPELINE_ID:-}" ]; then
echo "benchmarks-trigger bridge not found or did not run — skipping download"
exit 0
fi
echo "BP downstream pipeline: project=${BP_PROJECT_ID} pipeline=${DOWNSTREAM_PIPELINE_ID}"

# ── 2. list jobs in the downstream pipeline ──────────────────────────────────
JOBS_FILE="${TMPDIR_LOCAL}/jobs.json"
echo "Listing BP pipeline jobs…"
if ! api_get \
"${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/pipelines/${DOWNSTREAM_PIPELINE_ID}/jobs?per_page=100" \
"${JOBS_FILE}"; then
echo "Cannot list BP pipeline jobs — skipping download"
exit 0
fi

JOB_IDS=$(python3 -c "
import json
with open('${JOBS_FILE}') as f:
print(' '.join(str(j['id']) for j in json.load(f)))
")

# ── 3. download result_*.json from each job's artifact zip ───────────────────
DOWNLOADED=0
for JOB_ID in ${JOB_IDS}; do
ART_ZIP="${TMPDIR_LOCAL}/art_${JOB_ID}.zip"
ART_STATUS=$(curl -s -o "${ART_ZIP}" -w "%{http_code}" \
--header "JOB-TOKEN: ${CI_JOB_TOKEN}" \
"${CI_API_V4_URL}/projects/${BP_PROJECT_ID}/jobs/${JOB_ID}/artifacts" 2>/dev/null)
if [[ "${ART_STATUS}" == 2* ]]; then
# -j: junk paths (strip artifacts/ prefix), -q: quiet, -o: overwrite
if unzip -q -j "${ART_ZIP}" "artifacts/result_*.json" -d "${DEST}/" 2>/dev/null; then
DOWNLOADED=$((DOWNLOADED + 1))
fi
fi
done

RESULT_COUNT=$(find "${DEST}" -name "result_*.json" | wc -l)
echo "result_*.json files: ${RESULT_COUNT} (from ${DOWNLOADED} BP job(s))"

if [ "${RESULT_COUNT}" -eq 0 ]; then
echo "WARNING: no result JSONs found — BP jobs may not have run or produced artifacts yet"
exit 1
fi
83 changes: 79 additions & 4 deletions .gitlab/benchmarks/post-pr-comment.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env bash
# Post aggregated benchmark comparison results as a single PR comment.
# Post aggregated benchmark results as a single PR comment.
#
# Expects all per-cell comparison-baseline-vs-candidate_*.md reports to be
# present under REPORTS_DIR (default: reports/).
# Handles two report types found under REPORTS_DIR (default: reports/):
# - comparison-baseline-vs-candidate_*.md (perf comparison benchmarks)
# - result_${BENCHMARK}_${JDK}_${LIBRARY}.json (reliability benchmarks)
#
# Required env:
# DDPROF_COMMIT_BRANCH – branch name used to locate the open PR
Expand All @@ -14,7 +15,7 @@ set -euo pipefail
REPORTS_DIR="${1:-reports}"
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Aggregate all per-cell reports into a single comment body
# --- Perf comparison reports (markdown, one per benchmark cell) ---
SECTIONS=""
for md in "${REPORTS_DIR}"/comparison-baseline-vs-candidate_*.md; do
[ -f "${md}" ] || continue
Expand All @@ -28,6 +29,80 @@ $(cat "${md}")
"
done

# --- Reliability reports (JSON, one per benchmark×JDK×library) ---
RELIABILITY_TABLE=$(python3 - "${REPORTS_DIR}" <<'PYEOF'
import json, sys, glob, collections

reports_dir = sys.argv[1]
results = collections.defaultdict(dict)

for path in sorted(glob.glob(f"{reports_dir}/result_*.json")):
try:
with open(path) as f:
r = json.load(f)
key = (r["benchmark"], r["jdk"])
results[key][r["library"]] = r
except Exception:
continue

if not results:
sys.exit(0)

def fmt_avg(r):
icon = "💥" if r.get("crashed") else "✅"
avg = r.get("avg_ms", 0)
cnt = r.get("run_count", 0)
return f"{icon} {avg} ms ({cnt} iters)"

def fmt_delta(latest, dev):
la, da = latest.get("avg_ms", 0), dev.get("avg_ms", 0)
if not la or not da:
return "—"
pct = (da - la) / la * 100
# positive = dev is slower (potential regression)
arrow = "🔴 +" if pct > 2 else ("🟢 " if pct < -2 else "")
return f"{arrow}{pct:+.1f}%"

def fmt_uploads(r):
return str(r.get("upload_count", 0))

def fmt_issues(r):
e = r.get("error_count", 0)
w = r.get("warn_count", 0)
if e == 0 and w == 0:
return "—"
parts = []
if e: parts.append(f"E:{e}")
if w: parts.append(f"W:{w}")
return "⚠️ " + " ".join(parts)

lines = [
"### Reliability Benchmarks",
"",
"| Benchmark | JDK | Latest | Dev | Δ% (dev vs latest) | Uploads L/D | Issues L/D |",
"|-----------|-----|--------|-----|-------------------|-------------|------------|",
]
for (bench, jdk) in sorted(results.keys()):
libs = results[(bench, jdk)]
latest = libs.get("latest", {})
dev = libs.get("dev", {})

col_latest = fmt_avg(latest) if latest else "—"
col_dev = fmt_avg(dev) if dev else "—"
col_delta = fmt_delta(latest, dev) if (latest and dev) else "—"
col_uploads = f"{fmt_uploads(latest)} / {fmt_uploads(dev)}"
col_issues = f"{fmt_issues(latest)} / {fmt_issues(dev)}"

lines.append(f"| {bench} | {jdk} | {col_latest} | {col_dev} | {col_delta} | {col_uploads} | {col_issues} |")

print("\n".join(lines))
PYEOF
) || true

[ -n "${RELIABILITY_TABLE}" ] && SECTIONS="${SECTIONS}
${RELIABILITY_TABLE}
"

if [ -z "${SECTIONS}" ]; then
echo "No benchmark reports found under ${REPORTS_DIR} — skipping comment"
exit 0
Expand Down
Loading