From a66695592bbb431be35525559aaa2f606efac74a Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 25 Jun 2026 09:13:10 -0700 Subject: [PATCH 1/5] CVE scan: improve PR failure reporting Keep the Trivy scan step green so GitHub opens the reporting step that prints the actionable CVE findings, then fail PR runs from that reporting step. Also remove the Spark 3.5 Jackson CVE ignore entries on this test branch so the PR run exercises the failure UI. Generated-by: GPT-5 Codex --- .../trivyignore/spark-runtime-3.5.trivyignore | 2 - .github/workflows/cve-scan.yml | 66 +++++++++++++++---- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/.github/trivyignore/spark-runtime-3.5.trivyignore b/.github/trivyignore/spark-runtime-3.5.trivyignore index a8f4671634c5..fea447e69977 100644 --- a/.github/trivyignore/spark-runtime-3.5.trivyignore +++ b/.github/trivyignore/spark-runtime-3.5.trivyignore @@ -23,5 +23,3 @@ # fixed version instead of ignoring the finding. # # jackson-databind CVEs that are only fixed in jackson >= 2.18.8. -CVE-2026-54512 -CVE-2026-54513 diff --git a/.github/workflows/cve-scan.yml b/.github/workflows/cve-scan.yml index 043d2378bd43..2e7c5a2b568b 100644 --- a/.github/workflows/cve-scan.yml +++ b/.github/workflows/cve-scan.yml @@ -88,9 +88,9 @@ jobs: # Trivy CVE scan — scans bundled jars for known vulnerabilities. # # Behaviour: - # - On PRs: the scan blocks CI if CVEs are found (exit-code 1). - # SARIF upload is skipped because GitHub's Security tab only - # accepts results from default/protected branches. + # - On PRs: Trivy generates SARIF and the reporting step blocks CI if + # CVEs are found. This keeps the GitHub UI focused on the actionable + # report rather than the SARIF-producing scan step. # - On push to main/release branches: the scan is informational # (exit-code 0) and results are uploaded as SARIF to the GitHub # Security tab for ongoing tracking. @@ -220,19 +220,63 @@ jobs: severity: 'HIGH,CRITICAL' trivyignores: ${{ matrix.trivyignores }} limit-severities-for-sarif: true - # Block PRs on CVE findings; on main/release branches report without failing - exit-code: ${{ github.event_name == 'pull_request' && '1' || '0' }} + # Let Trivy generate SARIF without failing this step. GitHub opens the failed + # step by default, so the reporting step below fails PRs after printing the + # actionable CVE details. + exit-code: '0' format: 'sarif' output: 'trivy-results.sarif' trivy-image: ${{ env.TRIVY_IMAGE }} - - name: Print Trivy scan results + - name: Report Trivy scan results if: always() + env: + FAIL_ON_FINDINGS: ${{ github.event_name == 'pull_request' && 'true' || 'false' }} run: | - if [ -f trivy-results.sarif ]; then - echo "## Trivy CVE Scan Results — ${{ matrix.distribution }}" - jq -r '.runs[].results[] | "- \(.ruleId): \(.message.text)"' trivy-results.sarif 2>/dev/null || echo "No findings or unable to parse SARIF." - else - echo "No SARIF file found — scan may have failed to install." + results_file="trivy-results.sarif" + summary="${GITHUB_STEP_SUMMARY:-/dev/null}" + + log() { + printf '%s\n' "$*" | tee -a "${summary}" + } + + escape_annotation() { + value="$1" + value="${value//'%'/%25}" + value="${value//$'\r'/%0D}" + value="${value//$'\n'/%0A}" + printf '%s' "${value}" + } + + if [ ! -f "${results_file}" ]; then + log "No SARIF file found — scan may have failed to run." + exit 1 + fi + + if ! findings="$(jq -r '.runs[].results[]? | "- \(.ruleId): \(.message.text)"' "${results_file}")"; then + log "Unable to parse Trivy SARIF results." + exit 1 + fi + + log "## Trivy CVE Scan Results — ${{ matrix.distribution }}" + + if [ -z "${findings}" ]; then + log "No HIGH or CRITICAL vulnerabilities found." + exit 0 + fi + + printf '%s\n' "${findings}" | tee -a "${summary}" + + if [ "${FAIL_ON_FINDINGS}" = "true" ]; then + # Surface findings in the PR checks UI, not just in the workflow logs. + annotation_message="$( + printf '%s\n%s' \ + "Trivy found HIGH/CRITICAL vulnerabilities in ${{ matrix.distribution }}:" \ + "${findings}" + )" + annotation="$(escape_annotation "${annotation_message}")" + echo "::error title=Trivy CVE scan failed::${annotation}" + log "Failing because HIGH/CRITICAL vulnerabilities were found." + exit 1 fi - name: Upload Trivy results to GitHub Security tab if: always() && github.event_name == 'push' From eb3a4b439d873302015c2fefd418342b69526428 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 25 Jun 2026 09:23:34 -0700 Subject: [PATCH 2/5] CVE scan: format Trivy findings as a table Parse Trivy SARIF messages into a compact table and keep the PR annotation concise so the failed reporting step is easier to read. Generated-by: GPT-5 Codex --- .github/workflows/cve-scan.yml | 51 ++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cve-scan.yml b/.github/workflows/cve-scan.yml index 2e7c5a2b568b..0c7e87288c5f 100644 --- a/.github/workflows/cve-scan.yml +++ b/.github/workflows/cve-scan.yml @@ -239,6 +239,12 @@ jobs: printf '%s\n' "$*" | tee -a "${summary}" } + markdown_escape() { + value="$1" + value="${value//|/\\|}" + printf '%s' "${value}" + } + escape_annotation() { value="$1" value="${value//'%'/%25}" @@ -252,7 +258,24 @@ jobs: exit 1 fi - if ! findings="$(jq -r '.runs[].results[]? | "- \(.ruleId): \(.message.text)"' "${results_file}")"; then + if ! findings="$( + jq -r ' + def field($prefix): + (.message.text | split("\n") | map(select(startswith($prefix))) | first // "") + | ltrimstr($prefix); + + .runs[].results[]? + | [ + .ruleId, + field("Severity: "), + field("Package: "), + field("Installed Version: "), + field("Fixed Version: "), + field("Link: ") + ] + | @tsv + ' "${results_file}" + )"; then log "Unable to parse Trivy SARIF results." exit 1 fi @@ -264,18 +287,34 @@ jobs: exit 0 fi - printf '%s\n' "${findings}" | tee -a "${summary}" + finding_count="$(printf '%s\n' "${findings}" | awk 'END { print NR }')" + finding_ids="$(printf '%s\n' "${findings}" | cut -f1 | awk 'BEGIN { sep="" } { printf "%s%s", sep, $0; sep=", " } END { print "" }')" + + log "Found ${finding_count} HIGH/CRITICAL vulnerabilities." + log "" + log "| CVE | Severity | Package | Installed | Fixed | Link |" + log "| --- | --- | --- | --- | --- | --- |" + + while IFS=$'\t' read -r cve severity package installed fixed link; do + cve="$(markdown_escape "${cve}")" + severity="$(markdown_escape "${severity}")" + package="$(markdown_escape "${package}")" + installed="$(markdown_escape "${installed}")" + fixed="$(markdown_escape "${fixed}")" + link="$(markdown_escape "${link}")" + log "| ${cve} | ${severity} | \`${package}\` | \`${installed}\` | ${fixed} | ${link} |" + done <<< "${findings}" if [ "${FAIL_ON_FINDINGS}" = "true" ]; then # Surface findings in the PR checks UI, not just in the workflow logs. annotation_message="$( - printf '%s\n%s' \ - "Trivy found HIGH/CRITICAL vulnerabilities in ${{ matrix.distribution }}:" \ - "${findings}" + printf '%s' \ + "Trivy found ${finding_count} HIGH/CRITICAL vulnerabilities in ${{ matrix.distribution }}: ${finding_ids}. See the 'Report Trivy scan results' step for details." )" annotation="$(escape_annotation "${annotation_message}")" echo "::error title=Trivy CVE scan failed::${annotation}" - log "Failing because HIGH/CRITICAL vulnerabilities were found." + log "" + log "Failing because ${finding_count} HIGH/CRITICAL vulnerabilities were found." exit 1 fi - name: Upload Trivy results to GitHub Security tab From 60d31993ad0571eeb1435de88fe011398ccd93d2 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 25 Jun 2026 09:34:12 -0700 Subject: [PATCH 3/5] CVE scan: simplify Trivy report step Extract SARIF parsing and report rendering into small shell helpers so the report step is easier to read without changing behavior. Generated-by: GPT-5 Codex --- .github/workflows/cve-scan.yml | 53 ++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/.github/workflows/cve-scan.yml b/.github/workflows/cve-scan.yml index 0c7e87288c5f..51ee771090ab 100644 --- a/.github/workflows/cve-scan.yml +++ b/.github/workflows/cve-scan.yml @@ -253,12 +253,7 @@ jobs: printf '%s' "${value}" } - if [ ! -f "${results_file}" ]; then - log "No SARIF file found — scan may have failed to run." - exit 1 - fi - - if ! findings="$( + extract_findings() { jq -r ' def field($prefix): (.message.text | split("\n") | map(select(startswith($prefix))) | first // "") @@ -275,7 +270,31 @@ jobs: ] | @tsv ' "${results_file}" - )"; then + } + + report_findings() { + log "Found ${finding_count} HIGH/CRITICAL vulnerabilities." + log "" + log "| CVE | Severity | Package | Installed | Fixed | Link |" + log "| --- | --- | --- | --- | --- | --- |" + + while IFS=$'\t' read -r cve severity package installed fixed link; do + cve="$(markdown_escape "${cve}")" + severity="$(markdown_escape "${severity}")" + package="$(markdown_escape "${package}")" + installed="$(markdown_escape "${installed}")" + fixed="$(markdown_escape "${fixed}")" + link="$(markdown_escape "${link}")" + log "| ${cve} | ${severity} | \`${package}\` | \`${installed}\` | ${fixed} | ${link} |" + done <<< "${findings}" + } + + if [ ! -f "${results_file}" ]; then + log "No SARIF file found — scan may have failed to run." + exit 1 + fi + + if ! findings="$(extract_findings)"; then log "Unable to parse Trivy SARIF results." exit 1 fi @@ -290,27 +309,11 @@ jobs: finding_count="$(printf '%s\n' "${findings}" | awk 'END { print NR }')" finding_ids="$(printf '%s\n' "${findings}" | cut -f1 | awk 'BEGIN { sep="" } { printf "%s%s", sep, $0; sep=", " } END { print "" }')" - log "Found ${finding_count} HIGH/CRITICAL vulnerabilities." - log "" - log "| CVE | Severity | Package | Installed | Fixed | Link |" - log "| --- | --- | --- | --- | --- | --- |" - - while IFS=$'\t' read -r cve severity package installed fixed link; do - cve="$(markdown_escape "${cve}")" - severity="$(markdown_escape "${severity}")" - package="$(markdown_escape "${package}")" - installed="$(markdown_escape "${installed}")" - fixed="$(markdown_escape "${fixed}")" - link="$(markdown_escape "${link}")" - log "| ${cve} | ${severity} | \`${package}\` | \`${installed}\` | ${fixed} | ${link} |" - done <<< "${findings}" + report_findings if [ "${FAIL_ON_FINDINGS}" = "true" ]; then # Surface findings in the PR checks UI, not just in the workflow logs. - annotation_message="$( - printf '%s' \ - "Trivy found ${finding_count} HIGH/CRITICAL vulnerabilities in ${{ matrix.distribution }}: ${finding_ids}. See the 'Report Trivy scan results' step for details." - )" + annotation_message="Trivy found ${finding_count} HIGH/CRITICAL vulnerabilities in ${{ matrix.distribution }}: ${finding_ids}. See the 'Report Trivy scan results' step for details." annotation="$(escape_annotation "${annotation_message}")" echo "::error title=Trivy CVE scan failed::${annotation}" log "" From 713c8493692dab63628f96ecb43f6af589546396 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 25 Jun 2026 09:41:49 -0700 Subject: [PATCH 4/5] CVE scan: restore Spark 3.5 Trivy ignore Restore the Spark 3.5 Jackson CVE ignore entries that were removed only to exercise the PR failure UI. Generated-by: GPT-5 Codex --- .github/trivyignore/spark-runtime-3.5.trivyignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/trivyignore/spark-runtime-3.5.trivyignore b/.github/trivyignore/spark-runtime-3.5.trivyignore index fea447e69977..a8f4671634c5 100644 --- a/.github/trivyignore/spark-runtime-3.5.trivyignore +++ b/.github/trivyignore/spark-runtime-3.5.trivyignore @@ -23,3 +23,5 @@ # fixed version instead of ignoring the finding. # # jackson-databind CVEs that are only fixed in jackson >= 2.18.8. +CVE-2026-54512 +CVE-2026-54513 From 7e68d750a00abd2c02cc6dec68e58ac22c530563 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 25 Jun 2026 09:43:43 -0700 Subject: [PATCH 5/5] CVE scan: document PR and push behavior Clarify that Trivy always writes SARIF, PRs fail from the reporting step on findings, push runs keep findings informational, and missing or unparseable SARIF is still an error. Generated-by: GPT-5 Codex --- .github/workflows/cve-scan.yml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cve-scan.yml b/.github/workflows/cve-scan.yml index 51ee771090ab..6f2e5b2b9e7e 100644 --- a/.github/workflows/cve-scan.yml +++ b/.github/workflows/cve-scan.yml @@ -88,12 +88,13 @@ jobs: # Trivy CVE scan — scans bundled jars for known vulnerabilities. # # Behaviour: - # - On PRs: Trivy generates SARIF and the reporting step blocks CI if - # CVEs are found. This keeps the GitHub UI focused on the actionable - # report rather than the SARIF-producing scan step. - # - On push to main/release branches: the scan is informational - # (exit-code 0) and results are uploaded as SARIF to the GitHub - # Security tab for ongoing tracking. + # - Trivy always writes SARIF with exit-code 0. The reporting step owns + # finding-based failures so GitHub opens the step with actionable details. + # - On PRs: the reporting step blocks CI if HIGH/CRITICAL CVEs are found. + # - On push to main/release branches and release tags: findings are + # informational, then SARIF is uploaded to the GitHub Security tab. + # - Missing or unparseable SARIF is still a failure on every event because + # it means the scan did not produce usable results. # ------------------------------------------------------------------ cve-scan: runs-on: ubuntu-24.04 @@ -220,9 +221,9 @@ jobs: severity: 'HIGH,CRITICAL' trivyignores: ${{ matrix.trivyignores }} limit-severities-for-sarif: true - # Let Trivy generate SARIF without failing this step. GitHub opens the failed - # step by default, so the reporting step below fails PRs after printing the - # actionable CVE details. + # Let Trivy generate SARIF without failing on findings. GitHub opens the + # failed step by default, so PRs fail later in the reporting step that + # prints the actionable CVE details. exit-code: '0' format: 'sarif' output: 'trivy-results.sarif' @@ -230,6 +231,8 @@ jobs: - name: Report Trivy scan results if: always() env: + # PRs block on findings; push runs report findings without failing so + # SARIF can be uploaded to GitHub Security for tracking. FAIL_ON_FINDINGS: ${{ github.event_name == 'pull_request' && 'true' || 'false' }} run: | results_file="trivy-results.sarif"