diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml index 59aaf6c..ebaae12 100644 --- a/.github/workflows/review-pr.yml +++ b/.github/workflows/review-pr.yml @@ -80,6 +80,20 @@ permissions: id-token: write actions: read # download-artifact across workflow_run boundary +# Rate-anomaly safeguard: serialize same-trigger bursts on a PR (e.g. rapid +# force-pushes firing repeated auto-reviews) instead of running N in parallel. +# The group is keyed per PR AND per trigger intent: the comment id (or event +# name) suffix keeps distinct comments/replies in distinct groups, so a quick +# conversational reply is never queued behind a 45-minute review. cancel-in- +# progress is false so an in-flight review/reply is never killed mid-post. +# Per-PR request *frequency* is enforced by the rate-limit check below, and the +# in-action cache lock (review-pr/action.yml) prevents concurrent reviews; the +# workflow_run/fork path (PR number only in the artifact) falls back to a per-run +# group, where those two mechanisms still bound abuse. +concurrency: + group: pr-review-${{ github.event.pull_request.number || github.event.issue.number || inputs.pr-number || github.run_id }}-${{ github.event.comment.id || github.event_name }} + cancel-in-progress: false + jobs: resolve-context: if: inputs.trigger-run-id != '' @@ -315,10 +329,36 @@ jobs: REQUESTER: ${{ github.event.sender.login }} run: node "$DOCKER_AGENT_ACTION_ROOT/dist/check-org-membership.js" + # Rate-anomaly safeguard: count how many docker-agent reviews and replies + # landed on this PR in the recent window (full reviews via the Reviews API + # plus marker-bearing reply comments). An authorized account can still drive + # the bot at high frequency (each request costs an LLM run), so a burst above + # the threshold is flagged and the expensive review is skipped. Runs BEFORE + # "Create check run" so a throttled request creates no check run (a skipped + # review must not surface as a green "PR Review" check). + # + # Scope: this gate guards the review run only. The conversational reply jobs + # (reply-to-feedback, reply-to-mention) are intentionally left unthrottled + # (see the note on each). The window count itself already spans reviews AND + # replies, so heavy reply traffic still throttles subsequent review runs. + - name: Check rate anomaly + id: rate + if: | + steps.membership.outputs.is_member == 'true' && + steps.command.outputs.is_review != 'false' && + steps.draft.outputs.skip != 'true' + continue-on-error: true # fail-open: a rate-check error must not block reviews + shell: bash + env: + GITHUB_TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }} + RATE_PR_NUMBER: ${{ steps.pr.outputs.number }} + run: node "$DOCKER_AGENT_ACTION_ROOT/dist/rate-limit.js" + - name: Create check run if: | (steps.pr.outputs.source == 'event' || steps.pr.outputs.source == 'trigger') && - steps.membership.outputs.is_member == 'true' + steps.membership.outputs.is_member == 'true' && + steps.rate.outputs.anomalous != 'true' id: create-check continue-on-error: true uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 @@ -349,7 +389,8 @@ jobs: if: | steps.membership.outputs.is_member == 'true' && steps.command.outputs.is_review != 'false' && - steps.draft.outputs.skip != 'true' + steps.draft.outputs.skip != 'true' && + steps.rate.outputs.anomalous != 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 @@ -359,7 +400,8 @@ jobs: if: | steps.membership.outputs.is_member == 'true' && steps.command.outputs.is_review != 'false' && - steps.draft.outputs.skip != 'true' + steps.draft.outputs.skip != 'true' && + steps.rate.outputs.anomalous != 'true' id: run-review continue-on-error: true uses: docker/docker-agent-action/review-pr@e96a4bb40cac114f64358621e1d08346c8eadc8c # v2.0.1 @@ -404,6 +446,12 @@ jobs: core.warning(`Failed to update check run: ${error.message}`); } + # No rate-anomaly gate here (unlike the review job): conversational replies are + # gated by org membership and serialized per-PR by the `concurrency:` group, and + # each reply requires a distinct human comment to trigger it, so they cannot be + # fanned out the way review requests can. The rate-limit window count does still + # include these replies, so a reply burst throttles subsequent review runs. + # Extending the same gate to the reply jobs is a deferred follow-up. reply-to-feedback: needs: [resolve-context] if: | @@ -746,6 +794,10 @@ jobs: path: feedback/ retention-days: 90 + # No rate-anomaly gate here, same rationale as reply-to-feedback above: + # org-gated, per-PR-serialized, and trigger-bound to a human comment. The + # rate-limit window count includes these replies, so they still feed the + # review job's throttle. Gating the reply jobs themselves is a deferred follow-up. reply-to-mention: needs: [resolve-context] if: | diff --git a/.github/workflows/self-review-pr-trigger.yml b/.github/workflows/self-review-pr-trigger.yml index ddd0c46..8a98a7f 100644 --- a/.github/workflows/self-review-pr-trigger.yml +++ b/.github/workflows/self-review-pr-trigger.yml @@ -8,6 +8,15 @@ on: pull_request_review_comment: types: [ created ] +# Rate-anomaly safeguard: collapse a burst of review_requested events on the same +# PR at the source — re-requesting a review is redundant, so cancel-in-progress +# drops superseded context-capture runs and only the latest fans out to a review. +# Distinct review comments stay independent (the comment id keys them into their +# own group) so a real reply is never dropped by another comment on the same PR. +concurrency: + group: pr-review-trigger-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.comment.id || 'review-request' }} + cancel-in-progress: true + permissions: {} jobs: diff --git a/SECURITY.md b/SECURITY.md index 4f53f98..0580771 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -26,6 +26,8 @@ This action includes **built-in security features for all agent executions**: - **Suspicious patterns** (strip + warn): Behavioral/natural-language injection attempts ("ignore previous instructions", "system mode", "reveal the token", base64/hex obfuscation, etc.) — matching lines are removed from the prompt before it reaches the agent - **Medium-risk patterns** (warn only): API key variable names in configuration (`ANTHROPIC_API_KEY`, `GITHUB_TOKEN`, etc.) +4. **Review-Request Abuse Safeguards** — the comment/event-triggered PR review pipeline is hardened against abuse on every trigger path: org-membership validation and rate-anomaly throttling. See [Review-Request Abuse Safeguards](#review-request-abuse-safeguards). + ## Security Architecture The action implements a defense-in-depth approach: @@ -69,6 +71,55 @@ The action implements a defense-in-depth approach: └────────────────────────────────────────────────────────────────┘ ``` +## Review-Request Abuse Safeguards + +The PR review pipeline (`.github/workflows/review-pr.yml` and the `review-pr/` +composite actions) is comment- and event-triggered, which makes it the main +abuse vector for cost/spam. Two safeguards harden the review-request flow on +every trigger path (`review_requested`, the deprecated `/review` comment, +`@docker-agent` mentions, automatic `opened`/`synchronize`, and the +`workflow_run` fork path). + +### 1. Review requests come from org members + +Membership is verified before any review work runs, and which actor is checked +depends on the trigger: + +| Trigger | Actor verified | Mechanism | +|---------|----------------|-----------| +| `/review` comment, `@docker-agent` mention, reply-to-feedback | The commenter | `check-org-membership` against the `docker` org (OIDC `org-membership-token`) | +| Automatic review (`opened`/`synchronize`/`ready_for_review`) | The PR author | `check-org-membership` resolves the PR author live via the GitHub API | +| `review_requested` via the PR sidebar | The requester | The requesting org member is verified, so an external contributor's PR can be reviewed on request. The requester is taken only from a trusted source (`github.event.sender.login` on the direct same-repo path, re-derived from the PR timeline on the fork/`workflow_run` path), never from the trigger artifact. Also relies on **GitHub-native enforcement** (only users with triage/write access can request a reviewer) and gates on `requested_reviewer.login == 'docker-agent'` | + +The membership check **fails closed**: if no actor login can be resolved, or the +actor is not an org member, the review is skipped. `src/check-org-membership` +evaluates the authorization paths in order (the PR author for automatic review, +then the trusted requester for `review_requested`), resolving the PR author live +via the API so the directly-wired `pull_request` path verifies the author +instead of an empty comment author. + +### 2. Rate anomalies are detected and throttled + +Authorization gates *who* may trigger a review, not *how often*. Three layers +bound request frequency: + +- **Per-PR `concurrency:` groups** on `review-pr.yml` and the trigger workflow + collapse same-trigger bursts (e.g. rapid force-pushes, repeated review + requests) instead of running them in parallel. Groups are scoped per trigger + intent so a quick conversational reply is never queued behind a long review. +- **`src/rate-limit`** counts docker-agent review/reply outputs on the PR within + a sliding window (default 600 s) and, when the count crosses a threshold + (default 8), flags a rate anomaly. It counts one unit per LLM run: full reviews + via the Reviews API (`pulls.listReviews`, by bot author, covering findings, + zero-finding APPROVEs, and timeout/error/LGTM fallbacks, none of which carry an + inline marker) plus marker-bearing reply comments. The review job skips the + expensive review on a flagged anomaly; the conversational reply jobs are not + gated (they are org-gated and per-PR serialized), though their replies still + count toward the window. The check **fails open** so an API error never blocks a + legitimate review. +- The existing in-action **cache lock** (`pr-review-lock---*`, 600 s + TTL) prevents concurrent reviews from racing on the same PR. + ## Security Modules All security logic lives under `src/security/` and is compiled into `dist/security.js` by diff --git a/review-pr/README.md b/review-pr/README.md index c5f12a6..8ae7391 100644 --- a/review-pr/README.md +++ b/review-pr/README.md @@ -191,7 +191,8 @@ with: > > 1. **Verifies org membership** before every review. Auto-review checks the **PR author** (so only org members' PRs are reviewed automatically); a **requested review** checks the **requester**, so a maintainer can pull an external contributor's PR into review on demand; `/review` checks the commenter > 2. **Prevents bot cascades** — replies from bots (except `docker-agent`) are ignored -> 3. **Fork PRs work automatically** with the two-workflow setup — the trigger → `workflow_run` pattern provides OIDC/secret access regardless of fork status +> 3. **Throttles rate anomalies** — per-PR `concurrency:` groups collapse same-trigger bursts, and a rate-limit check skips the review when too many requests land on one PR in a short window +> 4. **Fork PRs work automatically** with the two-workflow setup — the trigger → `workflow_run` pattern provides OIDC/secret access regardless of fork status ### What you don't need to add @@ -204,6 +205,7 @@ The workflow YAML examples above are the complete, recommended setup. The reusab | **PR vs issue comment** | The reusable workflow checks `github.event.issue.pull_request` internally. Plain issue comments on non-PR issues are silently ignored. | | **Draft PR skipping** | Draft PRs are skipped internally — no caller condition needed. | | **Concurrent review guard** | A cache-based lock (`pr-review-lock---*`) prevents duplicate reviews from racing on the same PR. | +| **Rate-anomaly throttling** | Per-PR `concurrency:` groups serialize same-trigger bursts, and a rate-limit check skips the review when too many docker-agent reviews and replies land on one PR within the window. No caller configuration needed. | **The only decision callers make** is which setup pattern to use: 1-workflow for same-repo PRs, 2-workflow for repos that accept fork PRs. That distinction is the caller's responsibility because it controls which event path delivers OIDC credentials to the reusable workflow. diff --git a/src/post-mention-reply/index.ts b/src/post-mention-reply/index.ts index abcc126..620afd9 100644 --- a/src/post-mention-reply/index.ts +++ b/src/post-mention-reply/index.ts @@ -101,8 +101,9 @@ export async function run(config: PostMentionReplyConfig): Promise { const prNum = parseInt(prNumber, 10); // Guard 7: dedup check (inline only — top-level has no dedup, see C1). - // Workflow-level concurrency lock and should-reply guards prevent double-posting - // for top-level mentions; only inline threads need per-thread deduplication. + // Top-level mentions rely on the per-PR `concurrency:` group in review-pr.yml + // (which serializes runs for one PR) plus the should-reply guards to bound + // double-posting; only inline threads need this per-thread deduplication. if (isInline) { let isDuplicate = false; try { diff --git a/src/rate-limit/__tests__/rate-limit.test.ts b/src/rate-limit/__tests__/rate-limit.test.ts new file mode 100644 index 0000000..320de36 --- /dev/null +++ b/src/rate-limit/__tests__/rate-limit.test.ts @@ -0,0 +1,346 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +import * as core from '@actions/core'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('@actions/core'); + +const { + mockPaginate, + mockPaginateIterator, + mockListComments, + mockListReviewComments, + mockListReviews, + MockOctokit, +} = vi.hoisted(() => { + const mockListComments = { endpoint: 'issues.listComments' }; + const mockListReviewComments = { endpoint: 'pulls.listReviewComments' }; + const mockListReviews = { endpoint: 'pulls.listReviews' }; + const mockPaginateIterator = vi.fn(); + const mockPaginate = Object.assign(vi.fn(), { iterator: mockPaginateIterator }); + + class MockOctokit { + paginate = mockPaginate; + rest = { + issues: { listComments: mockListComments }, + pulls: { listReviewComments: mockListReviewComments, listReviews: mockListReviews }, + }; + } + return { + mockPaginate, + mockPaginateIterator, + mockListComments, + mockListReviewComments, + mockListReviews, + MockOctokit, + }; +}); + +vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit })); + +import { detectRateAnomaly, main } from '../index.js'; + +const NOW = Date.parse('2026-06-24T10:10:00.000Z'); +const within = (secAgo: number) => new Date(NOW - secAgo * 1000).toISOString(); + +const BOT = 'docker-agent'; +const REVIEW_MARKER = ''; +const REPLY_MARKER = ''; + +// A conversational reply (issue comment or inline review-comment reply): one per +// reply LLM run, identified by the reply marker. +function reply(secAgo: number, marker = REPLY_MARKER, login = BOT) { + return { user: { login }, body: `Reply ${marker}`, created_at: within(secAgo) }; +} + +// A full review posted via the Reviews API: one per review LLM run, identified by +// bot author plus a non-empty assessment/status body (no inline marker). +function review(secAgo: number, body = '### Assessment: 🟢 APPROVE', login = BOT) { + return { user: { login }, body, submitted_at: within(secAgo) }; +} + +// Route paginate() and paginate.iterator() to the right dataset based on endpoint. +function routePaginate(issue: unknown[], reviewComments: unknown[], reviews: unknown[]) { + mockPaginate.mockImplementation((endpoint: unknown) => { + if (endpoint === mockListReviewComments) return Promise.resolve(reviewComments); + return Promise.resolve(issue); + }); + mockPaginateIterator.mockImplementation(async function* () { + yield { data: reviews }; + }); +} + +beforeEach(() => { + vi.clearAllMocks(); +}); + +describe('detectRateAnomaly', () => { + const base = { + owner: 'docker', + repo: 'repo', + prNumber: 5, + windowSeconds: 600, + threshold: 3, + botLogin: BOT, + nowMs: NOW, + }; + + it('counts full reviews plus reply comments within the window', async () => { + routePaginate( + [reply(120)], // top-level reply (issue comment) + [reply(60)], // inline review-comment reply + [review(30), review(90)], // two full review runs + ); + + const r = await detectRateAnomaly('tok', base); + + expect(r.count).toBe(4); + expect(r.anomalous).toBe(true); + expect(r.threshold).toBe(3); + }); + + it('counts a zero-finding APPROVE review (no marker, non-empty body)', async () => { + // Regression: review bodies have no inline marker and zero-finding reviews + // post no inline comments, so this is invisible to the comment endpoints. + routePaginate([], [], [review(30, '### Assessment: 🟢 APPROVE')]); + const r = await detectRateAnomaly('tok', { ...base, threshold: 1 }); + expect(r.count).toBe(1); + expect(r.anomalous).toBe(true); + }); + + it('counts timeout / error / LGTM fallback reviews (no marker)', async () => { + routePaginate( + [], + [], + [ + review(30, '⏱️ **PR Review Timed Out** — …'), + review(60, '❌ **PR Review Failed** — …'), + review(90, '🟢 **No issues found** — LGTM!'), + ], + ); + const r = await detectRateAnomaly('tok', base); + expect(r.count).toBe(3); + expect(r.anomalous).toBe(true); + }); + + it('is not anomalous below the threshold', async () => { + routePaginate([reply(120)], [], [review(30)]); + const r = await detectRateAnomaly('tok', base); + expect(r.count).toBe(2); + expect(r.anomalous).toBe(false); + }); + + it('does not double-count a review by also counting its inline finding comments', async () => { + // A findings review is one run: the review object (counted) carries N inline + // comments with the review marker (NOT counted — they are part of that run). + routePaginate( + [], + [reply(40, REVIEW_MARKER), reply(40, REVIEW_MARKER), reply(40, REVIEW_MARKER)], + [review(40, '### Assessment: 🟡 NEEDS ATTENTION')], + ); + const r = await detectRateAnomaly('tok', { ...base, threshold: 1 }); + expect(r.count).toBe(1); + }); + + it('ignores empty-body review entries (standalone inline comments/replies)', async () => { + // Inline comments/replies surface in listReviews as empty-body review entries; + // they must not be counted as review runs. + routePaginate([], [], [review(30, ''), review(60, ' ')]); + const r = await detectRateAnomaly('tok', base); + expect(r.count).toBe(0); + expect(r.anomalous).toBe(false); + }); + + it('ignores reviews and replies outside the window', async () => { + routePaginate([reply(2000 /* 33min ago */)], [], [review(30), review(2000 /* outside 600s */)]); + const r = await detectRateAnomaly('tok', base); + expect(r.count).toBe(1); + }); + + it('ignores reviews and comments from other users', async () => { + routePaginate( + [{ user: { login: 'mallory' }, body: `spam ${REPLY_MARKER}`, created_at: within(10) }], + [], + [review(20, '### Assessment: 🟢 APPROVE', 'mallory')], + ); + const r = await detectRateAnomaly('tok', base); + expect(r.count).toBe(0); + expect(r.anomalous).toBe(false); + }); + + it('matches the docker-agent[bot] App-token identity', async () => { + routePaginate( + [reply(60, REPLY_MARKER, 'docker-agent[bot]')], + [], + [review(30, '### Assessment: 🟢 APPROVE', 'docker-agent[bot]')], + ); + const r = await detectRateAnomaly('tok', { ...base, threshold: 2 }); + expect(r.count).toBe(2); + expect(r.anomalous).toBe(true); + }); + + it('ignores agent reply comments that lack a reply marker (ordinary chatter)', async () => { + routePaginate( + [{ user: { login: BOT }, body: 'just a plain comment', created_at: within(10) }], + [], + [], + ); + const r = await detectRateAnomaly('tok', base); + expect(r.count).toBe(0); + }); + + it('counts the legacy cagent reply marker during the migration window', async () => { + routePaginate( + [{ user: { login: BOT }, body: 'old ', created_at: within(10) }], + [], + [], + ); + const r = await detectRateAnomaly('tok', { ...base, threshold: 1 }); + expect(r.count).toBe(1); + expect(r.anomalous).toBe(true); + }); + + it('passes a since timestamp to the comment endpoints', async () => { + routePaginate([], [], []); + await detectRateAnomaly('tok', base); + const issueCall = mockPaginate.mock.calls.find((c) => c[0] === mockListComments); + expect(issueCall?.[1]).toMatchObject({ + owner: 'docker', + repo: 'repo', + issue_number: 5, + since: new Date(NOW - 600 * 1000).toISOString(), + }); + }); + + it('queries listReviews via paginate.iterator without a since parameter (API has none)', async () => { + routePaginate([], [], []); + await detectRateAnomaly('tok', base); + const iteratorCall = mockPaginateIterator.mock.calls.find((c) => c[0] === mockListReviews); + expect(iteratorCall?.[1]).toMatchObject({ owner: 'docker', repo: 'repo', pull_number: 5 }); + expect(iteratorCall?.[1]).not.toHaveProperty('since'); + }); + + it('throws RangeError for non-positive windowSeconds', async () => { + routePaginate([], [], []); + await expect(detectRateAnomaly('tok', { ...base, windowSeconds: 0 })).rejects.toThrow( + RangeError, + ); + await expect(detectRateAnomaly('tok', { ...base, windowSeconds: -1 })).rejects.toThrow( + RangeError, + ); + await expect(detectRateAnomaly('tok', { ...base, windowSeconds: NaN })).rejects.toThrow( + RangeError, + ); + }); + + it('throws RangeError for non-positive threshold', async () => { + routePaginate([], [], []); + await expect(detectRateAnomaly('tok', { ...base, threshold: 0 })).rejects.toThrow(RangeError); + await expect(detectRateAnomaly('tok', { ...base, threshold: -1 })).rejects.toThrow(RangeError); + }); +}); + +describe('main()', () => { + const mockedSetOutput = vi.mocked(core.setOutput); + + // main() reads the clock via Date.now() (it cannot take an injected nowMs), + // so pin the system clock to NOW to keep the mocked comment timestamps inside + // the sliding window. + beforeEach(() => { + vi.unstubAllEnvs(); + vi.useFakeTimers(); + vi.setSystemTime(NOW); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('fails open and emits all four outputs when token is missing', async () => { + vi.stubEnv('GITHUB_TOKEN', ''); + vi.stubEnv('GH_TOKEN', ''); + vi.stubEnv('RATE_PR_NUMBER', '5'); + vi.stubEnv('RATE_WINDOW_SECONDS', '300'); + vi.stubEnv('RATE_MAX_REQUESTS', '4'); + + await main(); + + expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false'); + expect(mockedSetOutput).toHaveBeenCalledWith('count', '0'); + expect(mockedSetOutput).toHaveBeenCalledWith('window', '300'); + expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '4'); + }); + + it('fails open and emits all four outputs when PR number is missing', async () => { + vi.stubEnv('GITHUB_TOKEN', 'tok'); + vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo'); + vi.stubEnv('RATE_PR_NUMBER', 'not-a-number'); + + await main(); + + expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false'); + expect(mockedSetOutput).toHaveBeenCalledWith('count', '0'); + expect(mockedSetOutput).toHaveBeenCalledWith('window', '600'); + expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8'); + }); + + it('fails open and emits all four outputs when GITHUB_REPOSITORY is invalid', async () => { + vi.stubEnv('GITHUB_TOKEN', 'tok'); + vi.stubEnv('GITHUB_REPOSITORY', 'no-slash-here'); + vi.stubEnv('RATE_PR_NUMBER', '5'); + + await main(); + + expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false'); + expect(mockedSetOutput).toHaveBeenCalledWith('count', '0'); + expect(mockedSetOutput).toHaveBeenCalledWith('window', '600'); + expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8'); + }); + + it('emits all four outputs on a successful non-anomalous run', async () => { + vi.stubEnv('GITHUB_TOKEN', 'tok'); + vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo'); + vi.stubEnv('RATE_PR_NUMBER', '5'); + vi.stubEnv('RATE_WINDOW_SECONDS', '600'); + vi.stubEnv('RATE_MAX_REQUESTS', '8'); + routePaginate([], [], []); + + await main(); + + expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false'); + expect(mockedSetOutput).toHaveBeenCalledWith('count', '0'); + expect(mockedSetOutput).toHaveBeenCalledWith('window', '600'); + expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8'); + }); + + it('emits all four outputs on a successful anomalous run', async () => { + vi.stubEnv('GITHUB_TOKEN', 'tok'); + vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo'); + vi.stubEnv('RATE_PR_NUMBER', '5'); + vi.stubEnv('RATE_WINDOW_SECONDS', '600'); + vi.stubEnv('RATE_MAX_REQUESTS', '2'); + routePaginate([reply(60), reply(120)], [], []); + + await main(); + + expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'true'); + expect(mockedSetOutput).toHaveBeenCalledWith('count', '2'); + expect(mockedSetOutput).toHaveBeenCalledWith('window', '600'); + expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '2'); + }); + + it('fails open and emits all four outputs when the API throws', async () => { + vi.stubEnv('GITHUB_TOKEN', 'tok'); + vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo'); + vi.stubEnv('RATE_PR_NUMBER', '5'); + mockPaginate.mockRejectedValue(new Error('API error')); + + await main(); + + expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false'); + expect(mockedSetOutput).toHaveBeenCalledWith('count', '0'); + expect(mockedSetOutput).toHaveBeenCalledWith('window', '600'); + expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8'); + }); +}); diff --git a/src/rate-limit/index.ts b/src/rate-limit/index.ts new file mode 100644 index 0000000..9b04153 --- /dev/null +++ b/src/rate-limit/index.ts @@ -0,0 +1,272 @@ +// Copyright The Docker Agent Action authors +// SPDX-License-Identifier: Apache-2.0 + +/** + * rate-limit — detect (and let the workflow prevent) abnormally frequent review + * activity on a single pull request. + * + * The existing per-PR cache lock (review-pr/action.yml) only stops *concurrent* + * reviews and only on the review path; an authorized account can still drive the + * bot at high frequency (each request costs an LLM run). This module adds a + * frequency check: it counts how many docker-agent review/reply outputs landed + * on the PR within a recent time window and flags the request as a rate anomaly + * when that count crosses a threshold. The workflow gates the expensive review + * step on the result, so a burst is throttled rather than run N times. + * + * Counting is per LLM run, so each run contributes exactly one unit: + * - Reviews are posted via the Reviews API (POST /pulls/{n}/reviews) with no + * inline marker — a findings review, a zero-finding APPROVE, and the + * timeout/error/LGTM fallbacks all land there. They are counted from + * `pulls.listReviews` by bot author (a real review run always carries an + * assessment/status body); the inline finding comments such a review carries + * are deliberately not counted, since that would be N units per single run. + * - Replies are posted as issue comments or inline review-comment replies, + * each carrying a `-reply` marker, and are counted from the comment + * endpoints (one marker per reply run). + * Both signals are observable with the repo-scoped token already present and + * together measure how hard the bot is being driven on that PR. + * + * Exported: + * detectRateAnomaly(token, opts) → RateAnomalyResult + * + * CLI (invoked via dist/rate-limit.js): inputs from environment variables: + * GITHUB_TOKEN / GH_TOKEN Token with pull-request read scope + * GITHUB_REPOSITORY "owner/repo" + * RATE_PR_NUMBER PR number + * RATE_WINDOW_SECONDS Sliding window in seconds (default 600) + * RATE_MAX_REQUESTS Anomaly threshold (default 8) + * RATE_BOT_LOGIN Bot login whose comments are counted (default "docker-agent") + * Outputs (via @actions/core.setOutput): anomalous ("true"|"false"), count, window, threshold. + */ +import * as core from '@actions/core'; +import { Octokit } from '@octokit/rest'; + +// Reply markers identify the bot's conversational replies — one per reply LLM +// run — posted as issue comments or inline review-comment replies. Full reviews +// carry no marker on the review body and are counted separately (see +// detectRateAnomaly), so the review/finding markers are intentionally absent +// here: counting them would double-count a review run that already shows up via +// the Reviews API. The legacy cagent-* marker keeps older reply threads +// countable during migration. +const REPLY_MARKERS = ['', '']; + +// GitHub presents the bot identity as "docker-agent" when posting with a machine +// user token, or "docker-agent[bot]" through a GitHub App installation token. +// Match both so the count is correct regardless of which token posted. +function matchesBotLogin(login: string | null | undefined, botLogin: string): boolean { + return login === botLogin || login === `${botLogin}[bot]`; +} + +export interface RateAnomalyOptions { + owner: string; + repo: string; + prNumber: number; + /** Sliding window in seconds. */ + windowSeconds: number; + /** Inclusive count at/above which the request is flagged anomalous. */ + threshold: number; + /** Login whose review/reply comments are counted. */ + botLogin: string; + /** Reference "now" in epoch ms (injectable for deterministic tests). */ + nowMs?: number; +} + +export interface RateAnomalyResult { + count: number; + anomalous: boolean; + windowSeconds: number; + threshold: number; +} + +interface CommentLike { + user?: { login?: string } | null; + body?: string | null; + created_at?: string; +} + +interface ReviewLike { + user?: { login?: string } | null; + body?: string | null; + submitted_at?: string | null; +} + +function isAgentReplyComment(c: CommentLike, botLogin: string, windowStartMs: number): boolean { + if (!matchesBotLogin(c.user?.login, botLogin)) return false; + const body = c.body ?? ''; + if (!REPLY_MARKERS.some((m) => body.includes(m))) return false; + if (!c.created_at) return false; + const created = Date.parse(c.created_at); + return Number.isFinite(created) && created >= windowStartMs; +} + +function isAgentReview(r: ReviewLike, botLogin: string, windowStartMs: number): boolean { + if (!matchesBotLogin(r.user?.login, botLogin)) return false; + // A real review run always carries an assessment/status body ("### Assessment: + // …", or a timeout/error/LGTM fallback). Standalone inline comments and replies + // surface in this endpoint as empty-body review entries; skipping them keeps + // each review run counted exactly once and avoids double-counting an inline + // reply (already counted via its reply marker on the comment endpoints). + if (!r.body || r.body.trim().length === 0) return false; + if (!r.submitted_at) return false; + const submitted = Date.parse(r.submitted_at); + return Number.isFinite(submitted) && submitted >= windowStartMs; +} + +/** + * Count docker-agent review/reply outputs on `prNumber` within the last + * `windowSeconds` — full reviews (via the Reviews API) plus conversational + * replies (issue comments and inline review-comment replies) — and decide + * whether the count constitutes a rate anomaly. + */ +export async function detectRateAnomaly( + token: string, + opts: RateAnomalyOptions, +): Promise { + if (!Number.isFinite(opts.windowSeconds) || opts.windowSeconds <= 0) { + throw new RangeError( + `windowSeconds must be a positive finite number, got ${opts.windowSeconds}`, + ); + } + if (!Number.isFinite(opts.threshold) || opts.threshold <= 0) { + throw new RangeError(`threshold must be a positive integer, got ${opts.threshold}`); + } + + const octokit = new Octokit({ auth: token }); + const now = opts.nowMs ?? Date.now(); + const windowStartMs = now - opts.windowSeconds * 1000; + const since = new Date(windowStartMs).toISOString(); + + // `since` filters the comment endpoints server-side by updated_at; the + // per-comment created_at check below enforces the precise window. All three + // fetches run in parallel to cut wall-clock time. + const [issueComments, reviewComments, reviews] = await Promise.all([ + octokit.paginate(octokit.rest.issues.listComments, { + owner: opts.owner, + repo: opts.repo, + issue_number: opts.prNumber, + since, + per_page: 100, + }) as Promise, + octokit.paginate(octokit.rest.pulls.listReviewComments, { + owner: opts.owner, + repo: opts.repo, + pull_number: opts.prNumber, + since, + per_page: 100, + }) as Promise, + // The Reviews API has no `since` parameter. Use paginate.iterator and stop + // early once the entire page predates the window (GitHub returns reviews + // oldest-first), avoiding unbounded API calls on heavily-reviewed PRs. + (async (): Promise => { + const acc: ReviewLike[] = []; + for await (const page of octokit.paginate.iterator(octokit.rest.pulls.listReviews, { + owner: opts.owner, + repo: opts.repo, + pull_number: opts.prNumber, + per_page: 100, + })) { + acc.push(...(page.data as ReviewLike[])); + const allBeforeWindow = page.data.every((r) => { + const submittedAt = (r as ReviewLike).submitted_at; + return !submittedAt || Date.parse(submittedAt) < windowStartMs; + }); + if (allBeforeWindow) break; + } + return acc; + })(), + ]); + + const replyCount = [...issueComments, ...reviewComments].filter((c) => + isAgentReplyComment(c, opts.botLogin, windowStartMs), + ).length; + const reviewCount = reviews.filter((r) => isAgentReview(r, opts.botLogin, windowStartMs)).length; + const count = replyCount + reviewCount; + + return { + count, + anomalous: count >= opts.threshold, + windowSeconds: opts.windowSeconds, + threshold: opts.threshold, + }; +} + +// --------------------------------------------------------------------------- +// CLI entry point +// --------------------------------------------------------------------------- + +function parsePositiveInt(value: string | undefined, fallback: number): number { + const n = Number.parseInt(value ?? '', 10); + return Number.isInteger(n) && n > 0 ? n : fallback; +} + +export async function main(): Promise { + const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? ''; + const repository = process.env.GITHUB_REPOSITORY ?? ''; + const prNumber = Number.parseInt(process.env.RATE_PR_NUMBER ?? '', 10); + const windowSeconds = parsePositiveInt(process.env.RATE_WINDOW_SECONDS, 600); + const threshold = parsePositiveInt(process.env.RATE_MAX_REQUESTS, 8); + const botLogin = process.env.RATE_BOT_LOGIN?.trim() || 'docker-agent'; + + // Fail open: a missing token or unparseable PR number must not block reviews. + if (!token || !Number.isInteger(prNumber) || prNumber <= 0) { + core.warning('rate-limit: missing token or PR number — skipping rate check (fail-open)'); + core.setOutput('anomalous', 'false'); + core.setOutput('count', '0'); + core.setOutput('window', String(windowSeconds)); + core.setOutput('threshold', String(threshold)); + return; + } + + const slashIdx = repository.indexOf('/'); + if (slashIdx < 0) { + core.warning(`rate-limit: invalid GITHUB_REPOSITORY '${repository}' — skipping (fail-open)`); + core.setOutput('anomalous', 'false'); + core.setOutput('count', '0'); + core.setOutput('window', String(windowSeconds)); + core.setOutput('threshold', String(threshold)); + return; + } + const owner = repository.slice(0, slashIdx); + const repo = repository.slice(slashIdx + 1); + + try { + const result = await detectRateAnomaly(token, { + owner, + repo, + prNumber, + windowSeconds, + threshold, + botLogin, + }); + core.setOutput('anomalous', String(result.anomalous)); + core.setOutput('count', String(result.count)); + core.setOutput('window', String(result.windowSeconds)); + core.setOutput('threshold', String(result.threshold)); + + if (result.anomalous) { + core.warning( + `Rate anomaly on PR #${prNumber}: ${result.count} agent reviews/replies in the ` + + `last ${windowSeconds}s (threshold ${threshold}). Throttling this request.`, + ); + } else { + core.info( + `✅ Rate OK on PR #${prNumber}: ${result.count}/${threshold} agent reviews/replies in ${windowSeconds}s`, + ); + } + } catch (err: unknown) { + // Fail open on API errors — never let the rate check itself break reviews. + core.warning( + `rate-limit check failed (fail-open): ${err instanceof Error ? err.message : String(err)}`, + ); + core.setOutput('anomalous', 'false'); + core.setOutput('count', '0'); + core.setOutput('window', String(windowSeconds)); + core.setOutput('threshold', String(threshold)); + } +} + +if (process.argv[1]?.endsWith('rate-limit.js') && !process.env.VITEST) { + main().catch((err: unknown) => { + core.warning(`rate-limit failed: ${err instanceof Error ? err.message : String(err)}`); + }); +} diff --git a/tsup.config.ts b/tsup.config.ts index 3fe6d7f..48dc2d3 100644 --- a/tsup.config.ts +++ b/tsup.config.ts @@ -31,6 +31,7 @@ const entry = { 'mention-reply': src('mention-reply'), 'migrate-consumer-refs': src('migrate-consumer-refs'), 'post-mention-reply': src('post-mention-reply'), + 'rate-limit': src('rate-limit'), 'score-confidence': src('score-confidence'), 'score-risk': src('score-risk'), security: src('security'),