From 24120257452069d5107c7b2b3a8a5252a4cdc46b Mon Sep 17 00:00:00 2001 From: Preetam Dwivedi Date: Thu, 4 Jun 2026 21:58:02 -0700 Subject: [PATCH] Harden CI security and fix the required-checks merge gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary ### Why? The `required-checks` aggregator had the same latent bug that let a failing PR merge in submitqueue (PR #151): it had no `if: always()` and no failure detection. When a needed job fails, GitHub *skips* the dependent `required-checks` job, and a skipped required status check is treated as "not failed" — so a PR with failing build/test/lint could merge. The CI also ran with floating action tags, ambient write-scoped credentials, and no automated guard against workflow-security regressions. This ports submitqueue's CI security and required-checks hardening to tango. ### What? - `required-checks`: add `if: always()` and a step that fails on any `failure`/`cancelled`/`skipped` result among its `needs`. Add `workflow-security` to the gated set. - New `workflow-security` job: actionlint + zizmor (SHA-smell, dangerous triggers, credential persistence, template injection) plus a guard that fails if any non-GITHUB_TOKEN secret is referenced on the untrusted-PR-code path. - SHA-pin all actions (checkout, setup-go, actionlint, zizmor) with version comments instead of floating tags. - `persist-credentials: false` on every checkout that runs untrusted PR code (build-and-test, dependencies, lint, workflow-security). - Top-level `permissions: contents: read` (least privilege) and a `concurrency` group that cancels superseded PR runs only. - `merge_group` trigger so the merge queue is gated by required-checks; draft PRs skip CI and run on `ready_for_review`. - `.github/dependabot.yml` (gomod, security-updates only), `.github/zizmor.yml` config, and a CODEOWNERS `/.github/` admin override so workflow changes require admin review. ## Test Plan - ✅ `actionlint .github/workflows/ci.yml` — clean - ✅ `zizmor --offline .github/workflows/ci.yml` — no findings - ✅ YAML parses for ci.yml, dependabot.yml, zizmor.yml - ✅ Secrets guard: fires on `secrets.MY_PAT`, allows `secrets.GITHUB_TOKEN` - Full online zizmor audit + the gate behavior run in CI on this PR. --- .github/CODEOWNERS | 5 + .github/dependabot.yml | 9 ++ .github/workflows/ci.yml | 247 ++++++++++++++++++++++++++++----------- .github/zizmor.yml | 13 +++ 4 files changed, 206 insertions(+), 68 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/zizmor.yml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 698d25f..a0480fb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1,6 @@ * @uber/tango-admins @uber/tango-maintainers + +# CI workflows and other automation are privileged (secrets, write tokens). +# Require admin review for any change under .github/. CODEOWNERS is +# last-match-wins, so this overrides the `*` rule for these paths. +/.github/ @uber/tango-admins diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..ceae707 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,9 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "monthly" + # Disable scheduled version-update PRs; we only want security updates, + # which run independently of this limit. + open-pull-requests-limit: 0 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c74f0f..218a623 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,9 @@ name: CI check +run-name: >- + ${{ github.event_name == 'pull_request' && format('PR-CI #{0}', github.event.pull_request.number) + || github.event_name == 'merge_group' && 'MergeQueue-CI' + || github.event_name == 'push' && 'Main-CI' + || 'CI' }} on: push: @@ -9,99 +14,205 @@ on: - opened - reopened - synchronize + - ready_for_review + merge_group: + +# Least privilege: jobs only read the repo. Any job needing more must opt in +# explicitly at the job level. +permissions: + contents: read + +# Cancel superseded runs for the same PR to save runner minutes. Never cancel +# in-progress runs for `push` (main) or `merge_group` — those must complete so +# the default branch and merge queue always have a definitive result. +concurrency: + group: ci-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: build-and-test: + name: Build and Test + if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v4 + - name: Checkout code + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + # This job executes untrusted PR code (make build/test). Don't leave + # the GITHUB_TOKEN in the workspace git config while it runs. + persist-credentials: false - - name: Build all targets - run: make build + - name: Build all targets + run: make build - - name: Run all tests - run: make test + - name: Run all tests + run: make test - - name: Display test logs on failure - if: failure() - run: | - echo "=== Test logs ===" - find . -name "test.log" -exec echo "--- {} ---" \; -exec cat {} \; || echo "No test logs found" + - name: Display test logs on failure + if: failure() + run: | + echo "=== Test logs ===" + find . -name "test.log" -exec echo "--- {} ---" \; -exec cat {} \; || echo "No test logs found" dependencies: + name: Dependencies + if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - - - name: Run go mod tidy and bazel mod tidy - run: | - go mod tidy - ./tools/bazel mod tidy - - - name: Verify no uncommitted changes from mod tidy - run: | - if [ -n "$(git status --porcelain)" ]; then - echo "::error::Dependencies are out of date. Please run 'go mod tidy' and 'bazel mod tidy' locally and commit the results." - git diff - exit 1 - fi - - - name: Run gazelle - run: make gazelle - - - name: Verify no uncommitted changes from gazelle - run: | - if [ -n "$(git status --porcelain)" ]; then - echo "::error::BUILD.bazel files are out of date. Please run 'make gazelle' locally and commit the results." - git diff - exit 1 - fi + - name: Checkout code + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + # This job executes untrusted PR code (go mod tidy, bazel/gazelle). + persist-credentials: false + + - name: Set up Go + uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 + with: + go-version-file: go.mod + + - name: Run go mod tidy and bazel mod tidy + run: | + go mod tidy + ./tools/bazel mod tidy + + - name: Verify no uncommitted changes from mod tidy + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "::error::Dependencies are out of date. Please run 'go mod tidy' and 'bazel mod tidy' locally and commit the results." + git diff + exit 1 + fi + + - name: Run gazelle + run: make gazelle + + - name: Verify no uncommitted changes from gazelle + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "::error::BUILD.bazel files are out of date. Please run 'make gazelle' locally and commit the results." + git diff + exit 1 + fi lint: + name: Lint + if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - - - name: Run gazelle - run: make gazelle - - - name: Run linters - run: | - gofmt -w . - go install golang.org/x/tools/cmd/goimports@latest - goimports -w . - - - name: Verify no uncommitted changes from linters - run: | - if [ -n "$(git status --porcelain)" ]; then - echo "::error::Code is not formatted. Please run 'make gazelle', 'gofmt -w .', and 'goimports -w .' locally and commit the results." - git diff - exit 1 - fi - + - name: Checkout code + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + # This job executes untrusted PR code (gazelle, gofmt, goimports). + persist-credentials: false + + - name: Set up Go + uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 + with: + go-version-file: go.mod + + - name: Run gazelle + run: make gazelle + + - name: Run linters + run: | + gofmt -w . + go install golang.org/x/tools/cmd/goimports@latest + goimports -w . + + - name: Verify no uncommitted changes from linters + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "::error::Code is not formatted. Please run 'make gazelle', 'gofmt -w .', and 'goimports -w .' locally and commit the results." + git diff + exit 1 + fi + + # --------------------------------------------------------------------------- + # WORKFLOW SECURITY LINT + # + # Guards against regressions in the workflows themselves: actionlint checks + # general validity; zizmor audits for GitHub Actions security smells + # (dangerous triggers, unpinned `uses:`, credential persistence, template + # injection). Keeps the SHA-pinning / persist-credentials hardening from + # silently eroding in future edits. + # --------------------------------------------------------------------------- + workflow-security: + name: Workflow Security Lint + if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + persist-credentials: false + - name: actionlint + uses: raven-actions/actionlint@205b530c5d9fa8f44ae9ed59f341a0db994aa6f8 # v2.1.2 + - name: zizmor + uses: zizmorcore/zizmor-action@5f14fd08f7cf1cb1609c1e344975f152c7ee938d # v0.5.6 + with: + # Pin the zizmor TOOL version (distinct from the action tag above) for + # reproducible audits matching local validation. + version: "1.25.2" + # Fail the job on findings without requiring GitHub Advanced Security + # / SARIF upload (which needs security-events: write and degrades on + # fork PRs). Keeps the gate self-contained. + advanced-security: false + + # ci.yml runs untrusted PR code under the `pull_request` trigger. A + # repository secret referenced anywhere on that path is reachable by a + # malicious PR and can be exfiltrated on the PR run (before any review), + # so this path must stay secret-free — route any secret-bearing step + # through a SEPARATE trusted workflow (workflow_run / pull_request_target) + # that does not execute PR code. + # + # This guard catches ACCIDENTAL reintroduction by honest contributors; a + # malicious actor controlling ci.yml could delete the guard itself, so the + # real defense remains CODEOWNERS review on .github/. GITHUB_TOKEN + # (least-privilege, read-only here) is allowlisted. + - name: Guard — no repository secrets on the untrusted-code path + run: | + hits="$(grep -rnE '\$\{\{[^}]*secrets\.' \ + .github/workflows/ci.yml \ + | grep -vE 'secrets\.GITHUB_TOKEN' || true)" + if [ -n "$hits" ]; then + echo "::error::Repository secret referenced on the untrusted-code CI path (ci.yml):" >&2 + echo "$hits" >&2 + echo "Move secret-bearing steps to a separate trusted workflow that does not run PR code." >&2 + exit 1 + fi + echo "OK: no repository secrets referenced in ci.yml." + + # --------------------------------------------------------------------------- + # REQUIRED CHECKS GATE + # + # Fan-in aggregator that must turn RED when any required job fails, is + # cancelled, or is skipped. `if: always()` is critical: without it, this job + # is skipped when any `needs` dependency fails, and GitHub treats a skipped + # required status check as "not failed" — which would let a PR merge through + # the merge queue despite failing checks. + # --------------------------------------------------------------------------- required-checks: name: Required Checks + if: ${{ always() && (github.event_name != 'pull_request' || github.event.pull_request.draft == false) }} runs-on: ubuntu-latest needs: - build-and-test - dependencies - lint + - workflow-security steps: + - name: Fail if any required check did not succeed + if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }} + # Pass the job-results context via env (not inline ${{ }} in the script) + # so there is no template expansion inside the run block. + env: + NEEDS_JSON: ${{ toJSON(needs) }} + run: | + echo "One or more required checks did not succeed:" >&2 + echo "$NEEDS_JSON" >&2 + exit 1 + - name: All required checks passed - run: echo "All required checks passed!" >&2 + run: echo "All required checks passed!" diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 0000000..baa9f0d --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,13 @@ +# zizmor (GitHub Actions security auditor) configuration. +# +# Every ignore below is an INTENTIONAL, reviewed exception with a concrete +# justification — not a blanket mute. New findings outside these are expected +# to fail the `workflow-security` CI job. +rules: + # dependabot.yml intentionally disables scheduled version-update PRs + # (open-pull-requests-limit: 0) and relies only on security updates, which + # should be applied promptly. A version-update cooldown would add nothing + # (no version PRs are opened) and we do not want to delay security fixes. + dependabot-cooldown: + ignore: + - dependabot.yml