From 5836af5d57409dc055c78c17aa92cc676c56fa2d Mon Sep 17 00:00:00 2001
From: Sayt-0 <louis-dalmorocompta@docker.com>
Date: Wed, 24 Jun 2026 13:28:54 +0200
Subject: [PATCH 1/6] feat(review-pr): add abuse safeguards for review requests

Harden the comment and event triggered PR review pipeline against abuse on
every trigger path (review_requested, /review, @docker-agent mention, automatic
opened/synchronize, and the workflow_run fork path).

- Org membership: verify the PR author on the direct pull_request auto-review
  path (previously an empty comment author was checked) via a new resolveUsername
  helper with a live PR-author lookup fallback; fail closed when no user resolves.
- Audit log: new src/audit-log module emits one structured record per request
  (requester, time, trigger, PR, reviewed SHA, allow/deny/throttle decision) as a
  notice, a job-summary line, and a 90 day JSONL artifact, on allow and deny
  paths, with an inline fallback when setup-credentials is unavailable.
- Rate anomaly: new src/rate-limit module skips the review when too many agent
  review/reply comments land on a PR within a window; per-PR concurrency groups
  collapse same-trigger bursts; the existing cache lock still prevents concurrent
  reviews.
- Force push: new src/check-staleness module compares the requested SHA against
  current head, records the actually reviewed SHA, and posts a notice on rebase.

Documented in SECURITY.md and review-pr/README.md. Unit tests added for all new
modules.
---
 .github/workflows/review-pr.yml               | 144 ++++++++++++-
 .github/workflows/self-review-pr-trigger.yml  |   9 +
 SECURITY.md                                   |  70 ++++++
 review-pr/README.md                           |  12 +-
 src/audit-log/__tests__/audit-log.test.ts     | 167 +++++++++++++++
 src/audit-log/index.ts                        | 194 +++++++++++++++++
 .../__tests__/check-org-membership.test.ts    |  68 +++++-
 src/check-org-membership/index.ts             | 106 +++++++---
 .../__tests__/check-staleness.test.ts         |  74 +++++++
 src/check-staleness/index.ts                  | 124 +++++++++++
 src/post-mention-reply/index.ts               |   5 +-
 src/rate-limit/__tests__/rate-limit.test.ts   | 130 ++++++++++++
 src/rate-limit/index.ts                       | 200 ++++++++++++++++++
 tsup.config.ts                                |   3 +
 14 files changed, 1272 insertions(+), 34 deletions(-)
 create mode 100644 src/audit-log/__tests__/audit-log.test.ts
 create mode 100644 src/audit-log/index.ts
 create mode 100644 src/check-staleness/__tests__/check-staleness.test.ts
 create mode 100644 src/check-staleness/index.ts
 create mode 100644 src/rate-limit/__tests__/rate-limit.test.ts
 create mode 100644 src/rate-limit/index.ts

diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
index cfd2730..4fb3b37 100644
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@@ -80,6 +80,20 @@ permissions:
   id-token: write
   actions: read   # download-artifact across workflow_run boundary
 
+# Rate-anomaly safeguard: serialize same-trigger bursts on a PR (e.g. rapid
+# force-pushes firing repeated auto-reviews) instead of running N in parallel.
+# The group is keyed per PR AND per trigger intent: the comment id (or event
+# name) suffix keeps distinct comments/replies in distinct groups, so a quick
+# conversational reply is never queued behind a 45-minute review. cancel-in-
+# progress is false so an in-flight review/reply is never killed mid-post.
+# Per-PR request *frequency* is enforced by the rate-limit check below, and the
+# in-action cache lock (review-pr/action.yml) prevents concurrent reviews; the
+# workflow_run/fork path (PR number only in the artifact) falls back to a per-run
+# group, where those two mechanisms still bound abuse.
+concurrency:
+  group: pr-review-${{ github.event.pull_request.number || github.event.issue.number || inputs.pr-number || github.run_id }}-${{ github.event.comment.id || github.event_name }}
+  cancel-in-progress: false
+
 jobs:
   resolve-context:
     if: inputs.trigger-run-id != ''
@@ -305,12 +319,55 @@ jobs:
           PR_SOURCE: ${{ steps.pr.outputs.source }}
           ORG: docker
           COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
+          # PR author for the direct pull_request auto-review path, where there is
+          # no comment author. Without this the membership check ran against an
+          # empty login and the PR author was never actually verified.
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
         run: node "$DOCKER_AGENT_ACTION_ROOT/dist/check-org-membership.js"
 
+      # Rate-anomaly safeguard: count how many docker-agent review/reply comments
+      # were posted on this PR in the recent window. An authorized account can
+      # still drive the bot at high frequency (each request costs an LLM run), so
+      # a burst above the threshold is flagged and the expensive review is skipped.
+      # Runs BEFORE "Create check run" so a throttled request creates no check run
+      # (a skipped review must not surface as a green "PR Review" check).
+      - name: Check rate anomaly
+        id: rate
+        if: |
+          steps.membership.outputs.is_member == 'true' &&
+          steps.command.outputs.is_review != 'false' &&
+          steps.draft.outputs.skip != 'true'
+        continue-on-error: true   # fail-open: a rate-check error must not block reviews
+        shell: bash
+        env:
+          GITHUB_TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
+          RATE_PR_NUMBER: ${{ steps.pr.outputs.number }}
+        run: node "$DOCKER_AGENT_ACTION_ROOT/dist/rate-limit.js"
+
+      # Force-push safeguard: compare the SHA the review was requested for against
+      # the current PR head. The review still runs against the latest commit
+      # (refs/pull/N/head); this records the SHA actually reviewed and posts a
+      # notice when the branch was force-pushed/rebased after the request.
+      - name: Check force-push staleness
+        id: staleness
+        if: |
+          steps.membership.outputs.is_member == 'true' &&
+          steps.command.outputs.is_review != 'false' &&
+          steps.draft.outputs.skip != 'true' &&
+          steps.rate.outputs.anomalous != 'true'
+        continue-on-error: true   # fail-open: a staleness-check error must not block reviews
+        shell: bash
+        env:
+          GITHUB_TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
+          STALE_PR_NUMBER: ${{ steps.pr.outputs.number }}
+          STALE_REQUESTED_SHA: ${{ needs.resolve-context.outputs.pr-head-sha || github.event.pull_request.head.sha }}
+        run: node "$DOCKER_AGENT_ACTION_ROOT/dist/check-staleness.js"
+
       - name: Create check run
         if: |
           (steps.pr.outputs.source == 'event' || steps.pr.outputs.source == 'trigger') &&
-          steps.membership.outputs.is_member == 'true'
+          steps.membership.outputs.is_member == 'true' &&
+          steps.rate.outputs.anomalous != 'true'
         id: create-check
         continue-on-error: true
         uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
@@ -341,7 +398,8 @@ jobs:
         if: |
           steps.membership.outputs.is_member == 'true' &&
           steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true'
+          steps.draft.outputs.skip != 'true' &&
+          steps.rate.outputs.anomalous != 'true'
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
         with:
           fetch-depth: 0
@@ -351,7 +409,8 @@ jobs:
         if: |
           steps.membership.outputs.is_member == 'true' &&
           steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true'
+          steps.draft.outputs.skip != 'true' &&
+          steps.rate.outputs.anomalous != 'true'
         id: run-review
         continue-on-error: true
         uses: docker/docker-agent-action/review-pr@e96a4bb40cac114f64358621e1d08346c8eadc8c # v2.0.1
@@ -396,6 +455,85 @@ jobs:
               core.warning(`Failed to update check run: ${error.message}`);
             }
 
+      # Audit safeguard: emit one structured record per review request that joins
+      # the requester, time, trigger, PR, reviewed SHA, and the authorize/deny/
+      # throttle decision. Runs on the allow AND deny paths so denied, throttled,
+      # and stale requests are all recorded, not just successful ones. If
+      # setup-credentials failed (no dist bundle / DOCKER_AGENT_ACTION_ROOT), the
+      # record is still emitted inline so even infra-failure denials are logged.
+      - name: Audit review request
+        if: |
+          always() &&
+          steps.command.outputs.is_review != 'false' &&
+          steps.draft.outputs.skip != 'true'
+        continue-on-error: true
+        shell: bash
+        env:
+          IS_MEMBER: ${{ steps.membership.outputs.is_member }}
+          RATE_ANOMALOUS: ${{ steps.rate.outputs.anomalous }}
+          STALE: ${{ steps.staleness.outputs.stale }}
+          CURRENT_SHA: ${{ steps.staleness.outputs.current-sha }}
+          REQUESTED_SHA: ${{ needs.resolve-context.outputs.pr-head-sha || github.event.pull_request.head.sha }}
+          USER_REQUESTED: ${{ steps.trigger-type.outputs.user_requested }}
+          AUDIT_ACTOR: ${{ github.event.comment.user.login || github.event.sender.login || github.actor }}
+          AUDIT_PR_NUMBER: ${{ steps.pr.outputs.number }}
+          AUDIT_EVENT: ${{ github.event_name }}
+          REVIEW_AUDIT_FILE: ${{ runner.temp }}/review-audit/review-audit.jsonl
+        run: |
+          if [ "$IS_MEMBER" != "true" ]; then
+            DECISION=denied
+            REASON="requester is not a docker org member"
+          elif [ "$RATE_ANOMALOUS" = "true" ]; then
+            DECISION=throttled
+            REASON="rate anomaly: too many recent review requests on this PR"
+          else
+            DECISION=authorized
+            if [ "$STALE" = "true" ]; then
+              REASON="docker org member (PR head moved after request — reviewing latest commit)"
+            else
+              REASON="docker org member"
+            fi
+          fi
+          export AUDIT_DECISION="$DECISION"
+          export AUDIT_REASON="$REASON"
+          export AUDIT_TRIGGER="$([ "$USER_REQUESTED" = "true" ] && echo user-requested || echo automatic)"
+          export AUDIT_HEAD_SHA="${CURRENT_SHA:-$REQUESTED_SHA}"
+          export AUDIT_REQUESTED_SHA="$REQUESTED_SHA"
+
+          # If setup-credentials failed (OIDC/Secrets Manager misconfig), the dist
+          # bundle and DOCKER_AGENT_ACTION_ROOT are never set. Still record the
+          # denial inline so the "log every request, including denials" guarantee
+          # holds even on infra-failure paths.
+          if [ -z "$DOCKER_AGENT_ACTION_ROOT" ] || [ ! -f "$DOCKER_AGENT_ACTION_ROOT/dist/audit-log.js" ]; then
+            TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+            REC=$(jq -nc \
+              --arg ts "$TS" --arg event "$AUDIT_EVENT" --arg trigger "$AUDIT_TRIGGER" \
+              --arg actor "${AUDIT_ACTOR:-unknown}" --arg repo "$GITHUB_REPOSITORY" \
+              --arg pr "$AUDIT_PR_NUMBER" --arg head "$AUDIT_HEAD_SHA" \
+              --arg requested "$AUDIT_REQUESTED_SHA" --arg decision "$AUDIT_DECISION" \
+              --arg reason "$AUDIT_REASON (audit-log bundle unavailable — emitted inline)" \
+              '{timestamp:$ts,event:$event,trigger:$trigger,actor:$actor,repository:$repo,prNumber:$pr,headSha:$head,requestedSha:$requested,decision:$decision,reason:$reason}')
+            echo "::notice title=Review request audit::[review-request-audit] $REC"
+            mkdir -p "$(dirname "$REVIEW_AUDIT_FILE")"
+            printf '%s\n' "$REC" >> "$REVIEW_AUDIT_FILE" || true
+            exit 0
+          fi
+
+          node "$DOCKER_AGENT_ACTION_ROOT/dist/audit-log.js" || echo "::warning::audit-log failed"
+
+      - name: Upload review audit log
+        if: |
+          always() &&
+          steps.command.outputs.is_review != 'false' &&
+          steps.draft.outputs.skip != 'true'
+        continue-on-error: true
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: pr-review-audit-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/review-audit/
+          retention-days: 90
+          if-no-files-found: ignore
+
   reply-to-feedback:
     needs: [resolve-context]
     if: |
diff --git a/.github/workflows/self-review-pr-trigger.yml b/.github/workflows/self-review-pr-trigger.yml
index ddd0c46..b2d411c 100644
--- a/.github/workflows/self-review-pr-trigger.yml
+++ b/.github/workflows/self-review-pr-trigger.yml
@@ -8,6 +8,15 @@ on:
   pull_request_review_comment:
     types: [ created ]
 
+# Rate-anomaly safeguard: collapse a burst of review_requested events on the same
+# PR at the source — re-requesting a review is redundant, so cancel-in-progress
+# drops superseded context-capture runs and only the latest fans out to a review.
+# Distinct review comments stay independent (the comment id keys them into their
+# own group) so a real reply is never dropped by another comment on the same PR.
+concurrency:
+  group: pr-review-trigger-${{ github.event.pull_request.number }}-${{ github.event.comment.id || 'review-request' }}
+  cancel-in-progress: true
+
 permissions: {}
 
 jobs:
diff --git a/SECURITY.md b/SECURITY.md
index 4f53f98..ea31eaf 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -26,6 +26,8 @@ This action includes **built-in security features for all agent executions**:
    - **Suspicious patterns** (strip + warn): Behavioral/natural-language injection attempts ("ignore previous instructions", "system mode", "reveal the token", base64/hex obfuscation, etc.) — matching lines are removed from the prompt before it reaches the agent
    - **Medium-risk patterns** (warn only): API key variable names in configuration (`ANTHROPIC_API_KEY`, `GITHUB_TOKEN`, etc.)
 
+4. **Review-Request Abuse Safeguards** — the comment/event-triggered PR review pipeline is hardened against abuse on every trigger path: org-membership validation, a structured audit log of every request (including denials), rate-anomaly throttling, and force-push staleness handling. See [Review-Request Abuse Safeguards](#review-request-abuse-safeguards).
+
 ## Security Architecture
 
 The action implements a defense-in-depth approach:
@@ -69,6 +71,74 @@ The action implements a defense-in-depth approach:
 └────────────────────────────────────────────────────────────────┘
 ```
 
+## Review-Request Abuse Safeguards
+
+The PR review pipeline (`.github/workflows/review-pr.yml` and the `review-pr/`
+composite actions) is comment- and event-triggered, which makes it the main
+abuse vector for cost/spam. Four safeguards harden the review-request flow on
+every trigger path (`review_requested`, the deprecated `/review` comment,
+`@docker-agent` mentions, automatic `opened`/`synchronize`, and the
+`workflow_run` fork path).
+
+### 1. Review requests come from org members
+
+Membership is verified before any review work runs, and which actor is checked
+depends on the trigger:
+
+| Trigger | Actor verified | Mechanism |
+|---------|----------------|-----------|
+| `/review` comment, `@docker-agent` mention, reply-to-feedback | The commenter | `check-org-membership` against the `docker` org (OIDC `org-membership-token`) |
+| Automatic review (`opened`/`synchronize`/`ready_for_review`) and `review_requested` | The PR author | `check-org-membership` (`PR_AUTHOR` env, with a live PR-author API lookup as fallback) |
+| `review_requested` via the PR sidebar | The requester | **GitHub-native enforcement** — only users with triage/write access can request a reviewer; the workflow additionally gates on `requested_reviewer.login == 'docker-agent'` |
+
+The membership check **fails closed**: if no actor login can be resolved, or the
+actor is not an org member, the review is skipped. `src/check-org-membership`
+resolves the actor via `resolveUsername` so the directly-wired `pull_request`
+auto-review path verifies the PR author instead of an empty comment author.
+
+### 2. All review requests are logged
+
+`src/audit-log` emits one structured audit record per review request — on the
+allow path **and** on every denial/throttle path — joining the fields needed to
+investigate abuse after the fact: requester login, ISO-8601 timestamp, trigger
+type, repository, PR number, reviewed head SHA (and requested SHA when they
+differ), and the `authorized | denied | throttled | skipped | stale` decision
+with a reason. Each record is emitted three ways so it survives even when raw
+run logs are gone:
+
+- a `core.notice` annotation prefixed `[review-request-audit]` (queryable via the
+  annotations API and greppable in logs),
+- a line appended to the job summary (`$GITHUB_STEP_SUMMARY`), and
+- an append-only JSONL line uploaded as the `pr-review-audit-*` artifact
+  (90-day retention).
+
+### 3. Rate anomalies are detected and throttled
+
+Authorization gates *who* may trigger a review, not *how often*. Three layers
+bound request frequency:
+
+- **Per-PR `concurrency:` groups** on `review-pr.yml` and the trigger workflow
+  collapse same-trigger bursts (e.g. rapid force-pushes, repeated review
+  requests) instead of running them in parallel. Groups are scoped per trigger
+  intent so a quick conversational reply is never queued behind a long review.
+- **`src/rate-limit`** counts docker-agent review/reply comments posted on the PR
+  within a sliding window (default 600 s) and, when the count crosses a threshold
+  (default 8), flags a rate anomaly. The review job skips the expensive review and
+  records a `throttled` audit entry. The check **fails open** so an API error
+  never blocks a legitimate review.
+- The existing in-action **cache lock** (`pr-review-lock-<repo>-<pr>-*`, 600 s
+  TTL) prevents concurrent reviews from racing on the same PR.
+
+### 4. Force-pushed / rebased commits are handled
+
+A review always checks out `refs/pull/N/head` (the current head), but the head
+SHA captured when the review was requested may differ after a force-push/rebase.
+`src/check-staleness` re-fetches the current head and compares it to the
+requested SHA. The review proceeds against the latest commit (the freshest code
+is what should be reviewed); the SHA actually reviewed is recorded in the audit
+log and a `core.notice` is posted when the branch moved, so a force-push during
+the request window is visible rather than silent.
+
 ## Security Modules
 
 All security logic lives under `src/security/` and is compiled into `dist/security.js` by
diff --git a/review-pr/README.md b/review-pr/README.md
index 097d327..48a86a8 100644
--- a/review-pr/README.md
+++ b/review-pr/README.md
@@ -175,9 +175,12 @@ with:
 
 > **Built-in defense-in-depth:**
 >
-> 1. **Verifies org membership** before every review (auto-review checks the PR author; `/review` checks the commenter)
+> 1. **Verifies org membership** before every review. Comment/mention/reply triggers check the commenter; auto-review and `review_requested` check the PR author. Requesting a review from `docker-agent` in the sidebar additionally relies on GitHub-native enforcement (only users with triage/write access can request reviewers).
 > 2. **Prevents bot cascades** — replies from bots (except `docker-agent`) are ignored
-> 3. **Fork PRs work automatically** with the two-workflow setup — the trigger → `workflow_run` pattern provides OIDC/secret access regardless of fork status
+> 3. **Logs every review request** — one structured audit record per request (requester, time, trigger, PR, reviewed SHA, allow/deny/throttle decision) is emitted as a notice, a job-summary line, and a 90-day audit artifact, including on denial paths
+> 4. **Throttles rate anomalies** — per-PR `concurrency:` groups collapse same-trigger bursts, and a rate-limit check skips the review when too many requests land on one PR in a short window
+> 5. **Handles force-pushes** — when a PR is rebased/force-pushed after a review is requested, the review runs against the latest commit and the actually-reviewed SHA is recorded with a notice
+> 6. **Fork PRs work automatically** with the two-workflow setup — the trigger → `workflow_run` pattern provides OIDC/secret access regardless of fork status
 
 ### What you don't need to add
 
@@ -186,10 +189,13 @@ The workflow YAML examples above are the complete, recommended setup. The reusab
 | Protection | How it's handled |
 | ---------- | ---------------- |
 | **Bot comment filtering** | All jobs in the reusable workflow filter out `docker-agent`, `docker-agent[bot]`, any `Bot`-type user, and comments with `<!-- docker-agent-review -->`/`<!-- docker-agent-review-reply -->` markers. No caller-side filtering needed. |
-| **Org membership / authorization** | A `check-org-membership` step runs before any review work. PR authors and comment authors are verified as org members or collaborators via OIDC. Callers never need `author_association` checks. |
+| **Org membership / authorization** | A `check-org-membership` step runs before any review work. On comment/mention/reply triggers it verifies the commenter; on auto-review and `review_requested` it verifies the PR author (falling back to a live PR-author lookup when no author login is in the event). Callers never need `author_association` checks. |
 | **PR vs issue comment** | The reusable workflow checks `github.event.issue.pull_request` internally. Plain issue comments on non-PR issues are silently ignored. |
 | **Draft PR skipping** | Draft PRs are skipped internally — no caller condition needed. |
 | **Concurrent review guard** | A cache-based lock (`pr-review-lock-<repo>-<pr>-*`) prevents duplicate reviews from racing on the same PR. |
+| **Rate-anomaly throttling** | Per-PR `concurrency:` groups serialize same-trigger bursts, and a rate-limit check skips the review when too many docker-agent review/reply comments land on one PR within the window. No caller configuration needed. |
+| **Review-request audit log** | Every request is recorded (notice + job summary + 90-day `pr-review-audit-*` artifact) with requester, trigger, PR, reviewed SHA, and the allow/deny/throttle decision — including denied and throttled requests. |
+| **Force-push handling** | The SHA a review was requested for is compared to the current head; the review runs against the latest commit and the reviewed SHA is recorded so a force-push mid-flight is visible rather than silent. |
 
 **The only decision callers make** is which setup pattern to use: 1-workflow for same-repo PRs, 2-workflow for repos that accept fork PRs. That distinction is the caller's responsibility because it controls which event path delivers OIDC credentials to the reusable workflow.
 
diff --git a/src/audit-log/__tests__/audit-log.test.ts b/src/audit-log/__tests__/audit-log.test.ts
new file mode 100644
index 0000000..230592d
--- /dev/null
+++ b/src/audit-log/__tests__/audit-log.test.ts
@@ -0,0 +1,167 @@
+// Copyright The Docker Agent Action authors
+// SPDX-License-Identifier: Apache-2.0
+
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+const { mockNotice, mockWarning, mockAddRaw, mockAppendFileSync, mockMkdirSync } = vi.hoisted(
+  () => ({
+    mockNotice: vi.fn(),
+    mockWarning: vi.fn(),
+    mockAddRaw: vi.fn(),
+    mockAppendFileSync: vi.fn(),
+    mockMkdirSync: vi.fn(),
+  }),
+);
+
+vi.mock('@actions/core', () => ({
+  notice: mockNotice,
+  warning: mockWarning,
+  info: vi.fn(),
+  setOutput: vi.fn(),
+  summary: { addRaw: mockAddRaw, write: vi.fn().mockResolvedValue(undefined) },
+}));
+
+vi.mock('node:fs', () => ({
+  appendFileSync: mockAppendFileSync,
+  mkdirSync: mockMkdirSync,
+}));
+
+import {
+  buildAuditRecord,
+  emitAuditRecord,
+  formatNotice,
+  formatSummaryLine,
+  resolveAuditFilePath,
+} from '../index.js';
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('buildAuditRecord', () => {
+  it('maps environment variables into a full record', () => {
+    const r = buildAuditRecord({
+      AUDIT_TIMESTAMP: '2026-06-24T10:00:00.000Z',
+      AUDIT_EVENT: 'pull_request',
+      AUDIT_TRIGGER: 'review_requested',
+      AUDIT_ACTOR: 'alice',
+      AUDIT_PR_NUMBER: '42',
+      AUDIT_HEAD_SHA: 'abc123def456',
+      AUDIT_DECISION: 'authorized',
+      AUDIT_REASON: 'org member',
+      GITHUB_REPOSITORY: 'docker/repo',
+      GITHUB_RUN_ID: '999',
+      GITHUB_SERVER_URL: 'https://github.com',
+    });
+
+    expect(r).toMatchObject({
+      timestamp: '2026-06-24T10:00:00.000Z',
+      event: 'pull_request',
+      trigger: 'review_requested',
+      actor: 'alice',
+      repository: 'docker/repo',
+      prNumber: '42',
+      headSha: 'abc123def456',
+      decision: 'authorized',
+      reason: 'org member',
+      runId: '999',
+      runUrl: 'https://github.com/docker/repo/actions/runs/999',
+    });
+  });
+
+  it('falls back to safe defaults for missing fields', () => {
+    const r = buildAuditRecord({});
+    expect(r.trigger).toBe('unknown');
+    expect(r.actor).toBe('unknown');
+    expect(r.prNumber).toBe('');
+    expect(r.runUrl).toBe('');
+    // timestamp defaults to a valid ISO string
+    expect(Number.isNaN(Date.parse(r.timestamp))).toBe(false);
+  });
+
+  it('coerces an invalid decision to "skipped"', () => {
+    expect(buildAuditRecord({ AUDIT_DECISION: 'bogus' }).decision).toBe('skipped');
+    expect(buildAuditRecord({ AUDIT_DECISION: 'denied' }).decision).toBe('denied');
+    expect(buildAuditRecord({ AUDIT_DECISION: 'throttled' }).decision).toBe('throttled');
+  });
+
+  it('falls back to GITHUB_EVENT_NAME when AUDIT_EVENT is unset', () => {
+    expect(buildAuditRecord({ GITHUB_EVENT_NAME: 'issue_comment' }).event).toBe('issue_comment');
+  });
+
+  it('omits runUrl when repository or run id is missing', () => {
+    expect(buildAuditRecord({ GITHUB_REPOSITORY: 'docker/repo' }).runUrl).toBe('');
+    expect(buildAuditRecord({ GITHUB_RUN_ID: '1' }).runUrl).toBe('');
+  });
+});
+
+describe('resolveAuditFilePath', () => {
+  it('prefers REVIEW_AUDIT_FILE', () => {
+    expect(resolveAuditFilePath({ REVIEW_AUDIT_FILE: '/custom/audit.jsonl' })).toBe(
+      '/custom/audit.jsonl',
+    );
+  });
+  it('falls back to RUNNER_TEMP', () => {
+    expect(resolveAuditFilePath({ RUNNER_TEMP: '/runner/tmp' })).toBe(
+      '/runner/tmp/review-audit.jsonl',
+    );
+  });
+  it('falls back to /tmp', () => {
+    expect(resolveAuditFilePath({})).toBe('/tmp/review-audit.jsonl');
+  });
+});
+
+describe('formatters', () => {
+  const record = buildAuditRecord({
+    AUDIT_TIMESTAMP: '2026-06-24T10:00:00.000Z',
+    AUDIT_TRIGGER: 'mention',
+    AUDIT_ACTOR: 'bob',
+    AUDIT_PR_NUMBER: '7',
+    AUDIT_HEAD_SHA: 'deadbeefcafe',
+    AUDIT_DECISION: 'denied',
+    AUDIT_REASON: 'not an org member',
+  });
+
+  it('formatNotice prefixes with a greppable tag and embeds JSON', () => {
+    const msg = formatNotice(record);
+    expect(msg.startsWith('[review-request-audit] ')).toBe(true);
+    const parsed = JSON.parse(msg.slice('[review-request-audit] '.length));
+    expect(parsed.actor).toBe('bob');
+    expect(parsed.decision).toBe('denied');
+  });
+
+  it('formatSummaryLine renders a concise markdown bullet with a short SHA', () => {
+    const line = formatSummaryLine(record);
+    expect(line).toContain('**denied**');
+    expect(line).toContain('@bob');
+    expect(line).toContain('PR #7');
+    expect(line).toContain('`deadbeef`'); // short SHA
+    expect(line).toContain('not an org member');
+  });
+});
+
+describe('emitAuditRecord', () => {
+  it('emits a notice, a summary line, and appends JSONL to the audit file', () => {
+    const record = buildAuditRecord({ AUDIT_ACTOR: 'carol', AUDIT_DECISION: 'authorized' });
+    emitAuditRecord(record, { auditFilePath: '/tmp/x/review-audit.jsonl' });
+
+    expect(mockNotice).toHaveBeenCalledTimes(1);
+    expect(mockAddRaw).toHaveBeenCalledTimes(1);
+    expect(mockMkdirSync).toHaveBeenCalledWith('/tmp/x', { recursive: true });
+    expect(mockAppendFileSync).toHaveBeenCalledTimes(1);
+    const [path, line] = mockAppendFileSync.mock.calls[0];
+    expect(path).toBe('/tmp/x/review-audit.jsonl');
+    expect(JSON.parse((line as string).trim()).actor).toBe('carol');
+  });
+
+  it('downgrades a file-write failure to a warning (never throws)', () => {
+    mockAppendFileSync.mockImplementationOnce(() => {
+      throw new Error('disk full');
+    });
+    const record = buildAuditRecord({ AUDIT_ACTOR: 'dave' });
+    expect(() => emitAuditRecord(record, { auditFilePath: '/tmp/y.jsonl' })).not.toThrow();
+    expect(mockWarning).toHaveBeenCalledTimes(1);
+    // The notice still fired despite the persistence failure.
+    expect(mockNotice).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/src/audit-log/index.ts b/src/audit-log/index.ts
new file mode 100644
index 0000000..fc3632d
--- /dev/null
+++ b/src/audit-log/index.ts
@@ -0,0 +1,194 @@
+// Copyright The Docker Agent Action authors
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * audit-log — emit a structured audit record for every review request.
+ *
+ * This is an abuse safeguard: GitHub Actions run logs are not a reliable audit
+ * store (short retention, no query/export, deletable by repo admins), so each
+ * review request — on every trigger path and crucially on every *denial* path —
+ * is recorded as a single correlated record that joins the fields needed to
+ * investigate abuse after the fact: who triggered it, when, via which trigger,
+ * on which PR/SHA, and what the authorization decision was.
+ *
+ * Each invocation emits the record three ways:
+ *   1. core.notice  — a queryable workflow annotation (survives in the run UI
+ *                     and via the GitHub API even when raw logs are gone)
+ *   2. job summary  — a human-readable line appended to $GITHUB_STEP_SUMMARY
+ *   3. JSONL file   — an append-only line the workflow uploads as a
+ *                     long-retention artifact for durable, machine-readable audit
+ *
+ * CLI (invoked as a shell run step via dist/audit-log.js):
+ *   All inputs are read from environment variables:
+ *     AUDIT_TRIGGER       Trigger type (e.g. "review_requested", "mention", "automatic")
+ *     AUDIT_ACTOR         Login of the user who triggered the request
+ *     AUDIT_DECISION      "authorized" | "denied" | "skipped" | "throttled" | "stale"
+ *     AUDIT_REASON        Free-text reason for the decision (optional)
+ *     AUDIT_PR_NUMBER     PR number (optional)
+ *     AUDIT_HEAD_SHA      Reviewed head SHA (optional)
+ *     AUDIT_REQUESTED_SHA SHA the review was requested for, if different (optional)
+ *     AUDIT_EVENT         GitHub event name (optional; defaults to GITHUB_EVENT_NAME)
+ *     AUDIT_TIMESTAMP     ISO-8601 timestamp (optional; defaults to now)
+ *     REVIEW_AUDIT_FILE   Path for the JSONL audit file (optional; defaults to
+ *                         $RUNNER_TEMP/review-audit.jsonl, else /tmp/review-audit.jsonl)
+ *     GITHUB_REPOSITORY   "owner/repo" (standard GitHub Actions env var)
+ *     GITHUB_RUN_ID       Run id (standard GitHub Actions env var)
+ *     GITHUB_SERVER_URL   Server url (standard GitHub Actions env var)
+ *
+ * Guard: the CLI entry point only executes when process.argv[1] ends with
+ * "audit-log.js" and VITEST is not set, so importing this module as a library
+ * never triggers a write.
+ */
+import { appendFileSync, mkdirSync } from 'node:fs';
+import { dirname } from 'node:path';
+import * as core from '@actions/core';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/** Authorization / handling outcome recorded for a review request. */
+export type AuditDecision = 'authorized' | 'denied' | 'skipped' | 'throttled' | 'stale';
+
+const VALID_DECISIONS: readonly AuditDecision[] = [
+  'authorized',
+  'denied',
+  'skipped',
+  'throttled',
+  'stale',
+];
+
+export interface ReviewAuditRecord {
+  /** ISO-8601 timestamp of when the request was processed. */
+  timestamp: string;
+  /** GitHub event name (e.g. "pull_request", "issue_comment"). */
+  event: string;
+  /** Logical trigger type (e.g. "review_requested", "mention", "automatic"). */
+  trigger: string;
+  /** Login of the user who triggered the request ("unknown" when unresolved). */
+  actor: string;
+  /** "owner/repo". */
+  repository: string;
+  /** PR number, or empty string when not applicable. */
+  prNumber: string;
+  /** Reviewed head SHA, or empty string. */
+  headSha: string;
+  /** SHA the review was requested for, when it differs from headSha. */
+  requestedSha: string;
+  /** Authorization / handling outcome. */
+  decision: AuditDecision;
+  /** Human-readable reason for the decision. */
+  reason: string;
+  /** Workflow run id. */
+  runId: string;
+  /** Direct URL to the workflow run, when derivable. */
+  runUrl: string;
+}
+
+// ---------------------------------------------------------------------------
+// Pure builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a {@link ReviewAuditRecord} from environment variables. Pure: performs
+ * no I/O. Unknown/empty fields fall back to safe defaults so a partially-wired
+ * caller still produces a usable record rather than throwing.
+ */
+export function buildAuditRecord(env: NodeJS.ProcessEnv): ReviewAuditRecord {
+  const repository = env.GITHUB_REPOSITORY ?? '';
+  const runId = env.GITHUB_RUN_ID ?? '';
+  const serverUrl = env.GITHUB_SERVER_URL ?? 'https://github.com';
+
+  const decisionRaw = (env.AUDIT_DECISION ?? '').trim() as AuditDecision;
+  const decision: AuditDecision = VALID_DECISIONS.includes(decisionRaw) ? decisionRaw : 'skipped';
+
+  return {
+    timestamp: env.AUDIT_TIMESTAMP?.trim() || new Date().toISOString(),
+    event: (env.AUDIT_EVENT ?? env.GITHUB_EVENT_NAME ?? '').trim(),
+    trigger: (env.AUDIT_TRIGGER ?? '').trim() || 'unknown',
+    actor: (env.AUDIT_ACTOR ?? '').trim() || 'unknown',
+    repository,
+    prNumber: (env.AUDIT_PR_NUMBER ?? '').trim(),
+    headSha: (env.AUDIT_HEAD_SHA ?? '').trim(),
+    requestedSha: (env.AUDIT_REQUESTED_SHA ?? '').trim(),
+    decision,
+    reason: (env.AUDIT_REASON ?? '').trim(),
+    runId,
+    runUrl: repository && runId ? `${serverUrl}/${repository}/actions/runs/${runId}` : '',
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Formatters (pure)
+// ---------------------------------------------------------------------------
+
+/** Single-line annotation message. The `[review-request-audit]` prefix makes
+ *  records greppable in logs and filterable via the annotations API. */
+export function formatNotice(record: ReviewAuditRecord): string {
+  return `[review-request-audit] ${JSON.stringify(record)}`;
+}
+
+/** One markdown bullet for the job summary. SHAs are short-formed for brevity. */
+export function formatSummaryLine(record: ReviewAuditRecord): string {
+  const sha = record.headSha ? ` \`${record.headSha.slice(0, 8)}\`` : '';
+  const pr = record.prNumber ? ` PR #${record.prNumber}` : '';
+  const reason = record.reason ? ` — ${record.reason}` : '';
+  return `- \`${record.timestamp}\` **${record.decision}** ${record.trigger} by @${record.actor}${pr}${sha}${reason}`;
+}
+
+/** Resolve the JSONL audit file path, honoring REVIEW_AUDIT_FILE then RUNNER_TEMP. */
+export function resolveAuditFilePath(env: NodeJS.ProcessEnv): string {
+  if (env.REVIEW_AUDIT_FILE?.trim()) return env.REVIEW_AUDIT_FILE.trim();
+  const base = env.RUNNER_TEMP?.trim() || '/tmp';
+  return `${base}/review-audit.jsonl`;
+}
+
+// ---------------------------------------------------------------------------
+// Emitter (side effects)
+// ---------------------------------------------------------------------------
+
+export interface EmitOptions {
+  /** Path to append the JSONL record to. */
+  auditFilePath: string;
+}
+
+/**
+ * Emit the record as a notice + job-summary line + JSONL append. Persisting the
+ * record is best-effort: a failed file append must never block a review, so it
+ * downgrades to a warning rather than throwing.
+ */
+export function emitAuditRecord(record: ReviewAuditRecord, opts: EmitOptions): void {
+  core.notice(formatNotice(record), { title: 'Review request audit' });
+  core.summary.addRaw(`${formatSummaryLine(record)}\n`, false);
+
+  try {
+    mkdirSync(dirname(opts.auditFilePath), { recursive: true });
+    appendFileSync(opts.auditFilePath, `${JSON.stringify(record)}\n`);
+  } catch (err: unknown) {
+    core.warning(
+      `Failed to persist audit record to ${opts.auditFilePath}: ${
+        err instanceof Error ? err.message : String(err)
+      }`,
+    );
+  }
+}
+
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+
+async function main(): Promise<void> {
+  const record = buildAuditRecord(process.env);
+  emitAuditRecord(record, { auditFilePath: resolveAuditFilePath(process.env) });
+  // Surface the chosen path so the workflow can upload it as an artifact.
+  core.setOutput('audit-file', resolveAuditFilePath(process.env));
+  await core.summary.write();
+}
+
+// Guard: only run as CLI when invoked directly as dist/audit-log.js.
+if (process.argv[1]?.endsWith('audit-log.js') && !process.env.VITEST) {
+  main().catch((err: unknown) => {
+    // Audit logging must never fail a review; warn and exit 0.
+    core.warning(`audit-log failed: ${err instanceof Error ? err.message : String(err)}`);
+  });
+}
diff --git a/src/check-org-membership/__tests__/check-org-membership.test.ts b/src/check-org-membership/__tests__/check-org-membership.test.ts
index 41c9031..4ec0f91 100644
--- a/src/check-org-membership/__tests__/check-org-membership.test.ts
+++ b/src/check-org-membership/__tests__/check-org-membership.test.ts
@@ -30,7 +30,7 @@ const { mockCheckMembershipForUser, mockGetPull, MockOctokit, constructorTokens
 
 vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit }));
 
-import { checkOrgMembership, resolvePrAuthor } from '../index.js';
+import { checkOrgMembership, resolvePrAuthor, resolveUsername } from '../index.js';
 
 const ORG_TOKEN = 'fake-org-token';
 const REPO_TOKEN = 'fake-repo-token';
@@ -135,6 +135,72 @@ describe('resolvePrAuthor', () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// resolveUsername — picks which user's membership to verify per trigger path
+// ---------------------------------------------------------------------------
+
+describe('resolveUsername', () => {
+  const apiOpts = { repoToken: REPO_TOKEN, owner: 'docker', repo: 'repo', prNumber: 11 };
+
+  // This suite asserts on getPull call counts; reset history per test since the
+  // shared mock accumulates calls across the other suites in this file.
+  beforeEach(() => {
+    mockGetPull.mockClear();
+  });
+
+  it('uses the comment author on an event trigger when present', async () => {
+    const u = await resolveUsername({
+      prSource: 'event',
+      commentAuthor: 'alice',
+      prAuthor: '',
+      ...apiOpts,
+    });
+    expect(u).toBe('alice');
+    expect(mockGetPull).not.toHaveBeenCalled();
+  });
+
+  it('falls back to the PR author on an event trigger with no comment author (direct pull_request path)', async () => {
+    const u = await resolveUsername({
+      prSource: 'event',
+      commentAuthor: '',
+      prAuthor: 'frank',
+      ...apiOpts,
+    });
+    expect(u).toBe('frank');
+    expect(mockGetPull).not.toHaveBeenCalled();
+  });
+
+  it('resolves the PR author via the API when both author envs are empty on an event trigger', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { user: { login: 'grace' } } });
+    const u = await resolveUsername({
+      prSource: 'event',
+      commentAuthor: '',
+      prAuthor: '',
+      ...apiOpts,
+    });
+    expect(u).toBe('grace');
+    expect(mockGetPull).toHaveBeenCalledWith({ owner: 'docker', repo: 'repo', pull_number: 11 });
+  });
+
+  it('resolves the PR author via the API on the trigger path', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { user: { login: 'heidi' } } });
+    const u = await resolveUsername({
+      prSource: 'trigger',
+      commentAuthor: '',
+      prAuthor: '',
+      ...apiOpts,
+    });
+    expect(u).toBe('heidi');
+    expect(mockGetPull).toHaveBeenCalledTimes(1);
+  });
+
+  it('uses the repo token (not the org token) for the API fallback', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { user: { login: 'ivan' } } });
+    await resolveUsername({ prSource: 'input', commentAuthor: '', prAuthor: '', ...apiOpts });
+    expect(constructorTokens).toEqual([REPO_TOKEN]);
+  });
+});
+
 // ---------------------------------------------------------------------------
 // Token isolation: checkOrgMembership uses ORG_TOKEN, resolvePrAuthor uses REPO_TOKEN
 // ---------------------------------------------------------------------------
diff --git a/src/check-org-membership/index.ts b/src/check-org-membership/index.ts
index 4c275d0..bd43e09 100644
--- a/src/check-org-membership/index.ts
+++ b/src/check-org-membership/index.ts
@@ -15,8 +15,11 @@
  *     GITHUB_REPOSITORY      "owner/repo" (standard GitHub Actions env var)
  *     ORG                    GitHub org name to check (e.g. "docker")
  *     PR_SOURCE              "event" | "trigger" | "input"
- *     PR_NUMBER              PR number as string (required when PR_SOURCE != 'event')
- *     COMMENT_AUTHOR         User login (required when PR_SOURCE == 'event')
+ *     PR_NUMBER              PR number as string (required when no author env is set)
+ *     COMMENT_AUTHOR         Comment author login (set on comment-triggered events)
+ *     PR_AUTHOR              PR author login (set on pull_request events so the direct
+ *                            auto-review path verifies the PR author, not an empty
+ *                            comment author; falls back to a live API lookup if unset)
  *
  *   Outputs are written via @actions/core.setOutput (writes to $GITHUB_OUTPUT):
  *     is_member              "true" | "false"
@@ -83,6 +86,44 @@ export async function resolvePrAuthor(
   return pr.user?.login ?? '';
 }
 
+// ---------------------------------------------------------------------------
+// Core function: resolve which user's membership to verify
+// ---------------------------------------------------------------------------
+
+export interface ResolveUsernameOptions {
+  /** "event" | "trigger" | "input" — how the PR number was resolved. */
+  prSource: string;
+  /** Comment author login (set on comment-triggered events). */
+  commentAuthor: string;
+  /** PR author login (set on pull_request events). */
+  prAuthor: string;
+  /** Repo-scoped token used for the live PR-author lookup fallback. */
+  repoToken: string;
+  owner: string;
+  repo: string;
+  prNumber: number;
+}
+
+/**
+ * Resolve the login whose org membership must be verified for this request.
+ *
+ * Precedence:
+ *   1. On an "event" trigger, the comment author (comment-driven events) or the
+ *      PR author (pull_request events expose PR_AUTHOR, not a comment author).
+ *   2. Otherwise — and as a fallback when both author envs are empty — resolve
+ *      the PR author live via the API.
+ *
+ * The fallback closes the historical gap where a directly-wired pull_request
+ * auto-review passed an empty COMMENT_AUTHOR, so the PR author was never checked.
+ */
+export async function resolveUsername(opts: ResolveUsernameOptions): Promise<string> {
+  if (opts.prSource === 'event') {
+    const fromEvent = opts.commentAuthor || opts.prAuthor;
+    if (fromEvent) return fromEvent;
+  }
+  return resolvePrAuthor(opts.repoToken, opts.owner, opts.repo, opts.prNumber);
+}
+
 // ---------------------------------------------------------------------------
 // CLI entry point
 // ---------------------------------------------------------------------------
@@ -94,6 +135,7 @@ async function main(): Promise<void> {
   const prSource = process.env.PR_SOURCE ?? '';
   const prNumberStr = process.env.PR_NUMBER ?? '';
   const commentAuthor = process.env.COMMENT_AUTHOR ?? '';
+  const prAuthor = process.env.PR_AUTHOR ?? '';
   const repository = process.env.GITHUB_REPOSITORY ?? '';
 
   if (!orgToken) {
@@ -105,31 +147,45 @@ async function main(): Promise<void> {
     return;
   }
 
+  // A PR number is always required: even on the "event" path the author env may
+  // be empty (directly-wired pull_request auto-review), which falls back to a
+  // live PR-author lookup.
+  const prNumber = parseInt(prNumberStr, 10);
+  if (!Number.isInteger(prNumber) || prNumber <= 0) {
+    core.setFailed(`Invalid pr-number: '${prNumberStr}' (expected positive integer)`);
+    return;
+  }
+  const slashIdx = repository.indexOf('/');
+  if (slashIdx < 0) {
+    core.setFailed(`Invalid GITHUB_REPOSITORY: '${repository}' (expected 'owner/repo')`);
+    return;
+  }
+  const owner = repository.slice(0, slashIdx);
+  const repo = repository.slice(slashIdx + 1);
+
   let username: string;
+  try {
+    username = await resolveUsername({
+      prSource,
+      commentAuthor,
+      prAuthor,
+      repoToken,
+      owner,
+      repo,
+      prNumber,
+    });
+  } catch (err: unknown) {
+    core.setFailed(
+      `Failed to resolve user for membership check on #${prNumber}: ${err instanceof Error ? err.message : String(err)}`,
+    );
+    return;
+  }
 
-  if (prSource === 'event') {
-    username = commentAuthor;
-  } else {
-    const prNumber = parseInt(prNumberStr, 10);
-    if (!Number.isInteger(prNumber) || prNumber <= 0) {
-      core.setFailed(`Invalid pr-number: '${prNumberStr}' (expected positive integer)`);
-      return;
-    }
-    const slashIdx = repository.indexOf('/');
-    if (slashIdx < 0) {
-      core.setFailed(`Invalid GITHUB_REPOSITORY: '${repository}' (expected 'owner/repo')`);
-      return;
-    }
-    const owner = repository.slice(0, slashIdx);
-    const repo = repository.slice(slashIdx + 1);
-    try {
-      username = await resolvePrAuthor(repoToken, owner, repo, prNumber);
-    } catch (err: unknown) {
-      core.setFailed(
-        `Failed to resolve PR author for #${prNumber}: ${err instanceof Error ? err.message : String(err)}`,
-      );
-      return;
-    }
+  if (!username) {
+    // No resolvable user — fail closed: do not authorize a review we can't attribute.
+    core.setOutput('is_member', 'false');
+    core.info('⏭️ Could not resolve a user to verify org membership — skipping review');
+    return;
   }
 
   try {
diff --git a/src/check-staleness/__tests__/check-staleness.test.ts b/src/check-staleness/__tests__/check-staleness.test.ts
new file mode 100644
index 0000000..27bd778
--- /dev/null
+++ b/src/check-staleness/__tests__/check-staleness.test.ts
@@ -0,0 +1,74 @@
+// Copyright The Docker Agent Action authors
+// SPDX-License-Identifier: Apache-2.0
+
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+vi.mock('@actions/core');
+
+const { mockGetPull, MockOctokit, constructorTokens } = vi.hoisted(() => {
+  const mockGetPull = vi.fn();
+  const constructorTokens: string[] = [];
+  class MockOctokit {
+    constructor({ auth }: { auth: string }) {
+      constructorTokens.push(auth);
+    }
+    rest = { pulls: { get: mockGetPull } };
+  }
+  return { mockGetPull, MockOctokit, constructorTokens };
+});
+
+vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit }));
+
+import { checkStaleness } from '../index.js';
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  constructorTokens.length = 0;
+});
+
+const opts = { owner: 'docker', repo: 'repo', prNumber: 9 };
+
+describe('checkStaleness', () => {
+  it('flags stale when the requested SHA differs from current head', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'newsha111' } } });
+
+    const r = await checkStaleness('tok', { ...opts, requestedSha: 'oldsha000' });
+
+    expect(r).toEqual({ requestedSha: 'oldsha000', currentSha: 'newsha111', stale: true });
+    expect(mockGetPull).toHaveBeenCalledWith({ owner: 'docker', repo: 'repo', pull_number: 9 });
+    expect(constructorTokens).toEqual(['tok']);
+  });
+
+  it('is not stale when the requested SHA matches current head', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'samesha' } } });
+    const r = await checkStaleness('tok', { ...opts, requestedSha: 'samesha' });
+    expect(r.stale).toBe(false);
+  });
+
+  it('is not stale (fail-open) when the requested SHA is empty', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'newsha' } } });
+    const r = await checkStaleness('tok', { ...opts, requestedSha: '' });
+    expect(r).toEqual({ requestedSha: '', currentSha: 'newsha', stale: false });
+  });
+
+  it('trims surrounding whitespace on the requested SHA before comparing', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'abc' } } });
+    const r = await checkStaleness('tok', { ...opts, requestedSha: '  abc \n' });
+    expect(r.stale).toBe(false);
+    expect(r.requestedSha).toBe('abc');
+  });
+
+  it('is not stale when current head is unknown (missing head.sha)', async () => {
+    mockGetPull.mockResolvedValueOnce({ data: { head: {} } });
+    const r = await checkStaleness('tok', { ...opts, requestedSha: 'oldsha' });
+    expect(r.currentSha).toBe('');
+    expect(r.stale).toBe(false);
+  });
+
+  it('propagates API errors to the caller', async () => {
+    mockGetPull.mockRejectedValueOnce(Object.assign(new Error('Not Found'), { status: 404 }));
+    await expect(checkStaleness('tok', { ...opts, requestedSha: 'x' })).rejects.toThrow(
+      'Not Found',
+    );
+  });
+});
diff --git a/src/check-staleness/index.ts b/src/check-staleness/index.ts
new file mode 100644
index 0000000..b48cbb2
--- /dev/null
+++ b/src/check-staleness/index.ts
@@ -0,0 +1,124 @@
+// Copyright The Docker Agent Action authors
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * check-staleness — detect when a PR was force-pushed / rebased between the
+ * moment a review was requested and the moment it actually runs.
+ *
+ * Review requests capture the head SHA at trigger time (e.g. pr_head_sha.txt in
+ * the trigger artifact, or github.event.pull_request.head.sha on the direct
+ * path), but every review checks out refs/pull/N/head, which always resolves to
+ * the *current* head. If the branch was force-pushed in between, the review
+ * silently runs against a different commit than the one requested, and the
+ * posted review / check run end up pinned to a SHA nobody asked to review.
+ *
+ * This module re-fetches the current head SHA and compares it to the requested
+ * SHA so the workflow can record the SHA actually reviewed and post a notice
+ * when the two diverge. The review proceeds against current head (the freshest
+ * commit is what should be reviewed) — the safeguard is detection + an honest
+ * record, not blocking.
+ *
+ * Exported:
+ *   checkStaleness(token, opts) → StalenessResult
+ *
+ * CLI (invoked via dist/check-staleness.js): inputs from environment variables:
+ *   GITHUB_TOKEN / GH_TOKEN   Token with pull-request read scope
+ *   GITHUB_REPOSITORY         "owner/repo"
+ *   STALE_PR_NUMBER           PR number
+ *   STALE_REQUESTED_SHA       Head SHA captured when the review was requested
+ * Outputs (via @actions/core.setOutput):
+ *   stale ("true"|"false"), current-sha, requested-sha
+ */
+import * as core from '@actions/core';
+import { Octokit } from '@octokit/rest';
+
+export interface StalenessOptions {
+  owner: string;
+  repo: string;
+  prNumber: number;
+  /** Head SHA captured when the review was requested ("" when unknown). */
+  requestedSha: string;
+}
+
+export interface StalenessResult {
+  requestedSha: string;
+  currentSha: string;
+  /** True only when both SHAs are known and differ. */
+  stale: boolean;
+}
+
+/**
+ * Fetch the current PR head SHA and compare it to `requestedSha`. When the
+ * requested SHA is empty/unknown, staleness cannot be determined and `stale` is
+ * false (fail-open: never block on missing data).
+ */
+export async function checkStaleness(
+  token: string,
+  opts: StalenessOptions,
+): Promise<StalenessResult> {
+  const octokit = new Octokit({ auth: token });
+  const { data: pr } = await octokit.rest.pulls.get({
+    owner: opts.owner,
+    repo: opts.repo,
+    pull_number: opts.prNumber,
+  });
+  const currentSha = pr.head?.sha ?? '';
+  const requestedSha = opts.requestedSha.trim();
+  const stale = requestedSha !== '' && currentSha !== '' && requestedSha !== currentSha;
+  return { requestedSha, currentSha, stale };
+}
+
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+
+async function main(): Promise<void> {
+  const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? '';
+  const repository = process.env.GITHUB_REPOSITORY ?? '';
+  const prNumber = Number.parseInt(process.env.STALE_PR_NUMBER ?? '', 10);
+  const requestedSha = process.env.STALE_REQUESTED_SHA ?? '';
+
+  if (!token || !Number.isInteger(prNumber) || prNumber <= 0) {
+    core.warning('check-staleness: missing token or PR number — skipping (fail-open)');
+    core.setOutput('stale', 'false');
+    return;
+  }
+
+  const slashIdx = repository.indexOf('/');
+  if (slashIdx < 0) {
+    core.warning(`check-staleness: invalid GITHUB_REPOSITORY '${repository}' — skipping`);
+    core.setOutput('stale', 'false');
+    return;
+  }
+  const owner = repository.slice(0, slashIdx);
+  const repo = repository.slice(slashIdx + 1);
+
+  try {
+    const result = await checkStaleness(token, { owner, repo, prNumber, requestedSha });
+    core.setOutput('stale', String(result.stale));
+    core.setOutput('current-sha', result.currentSha);
+    core.setOutput('requested-sha', result.requestedSha);
+
+    if (result.stale) {
+      core.notice(
+        `PR #${prNumber} was updated after the review was requested: requested ` +
+          `${result.requestedSha.slice(0, 8)}, now reviewing ${result.currentSha.slice(0, 8)}. ` +
+          `The review will run against the latest commit.`,
+        { title: 'Force-push detected — reviewing latest commit' },
+      );
+    } else {
+      core.info(`✅ PR #${prNumber} head is current (${result.currentSha.slice(0, 8)})`);
+    }
+  } catch (err: unknown) {
+    core.warning(
+      `check-staleness failed (fail-open): ${err instanceof Error ? err.message : String(err)}`,
+    );
+    core.setOutput('stale', 'false');
+  }
+}
+
+if (process.argv[1]?.endsWith('check-staleness.js') && !process.env.VITEST) {
+  main().catch((err: unknown) => {
+    core.warning(`check-staleness failed: ${err instanceof Error ? err.message : String(err)}`);
+  });
+}
diff --git a/src/post-mention-reply/index.ts b/src/post-mention-reply/index.ts
index abcc126..620afd9 100644
--- a/src/post-mention-reply/index.ts
+++ b/src/post-mention-reply/index.ts
@@ -101,8 +101,9 @@ export async function run(config: PostMentionReplyConfig): Promise<void> {
   const prNum = parseInt(prNumber, 10);
 
   // Guard 7: dedup check (inline only — top-level has no dedup, see C1).
-  // Workflow-level concurrency lock and should-reply guards prevent double-posting
-  // for top-level mentions; only inline threads need per-thread deduplication.
+  // Top-level mentions rely on the per-PR `concurrency:` group in review-pr.yml
+  // (which serializes runs for one PR) plus the should-reply guards to bound
+  // double-posting; only inline threads need this per-thread deduplication.
   if (isInline) {
     let isDuplicate = false;
     try {
diff --git a/src/rate-limit/__tests__/rate-limit.test.ts b/src/rate-limit/__tests__/rate-limit.test.ts
new file mode 100644
index 0000000..0bd9787
--- /dev/null
+++ b/src/rate-limit/__tests__/rate-limit.test.ts
@@ -0,0 +1,130 @@
+// Copyright The Docker Agent Action authors
+// SPDX-License-Identifier: Apache-2.0
+
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+vi.mock('@actions/core');
+
+const { mockPaginate, mockListComments, mockListReviewComments, MockOctokit } = vi.hoisted(() => {
+  const mockListComments = { endpoint: 'issues.listComments' };
+  const mockListReviewComments = { endpoint: 'pulls.listReviewComments' };
+  const mockPaginate = vi.fn();
+
+  class MockOctokit {
+    paginate = mockPaginate;
+    rest = {
+      issues: { listComments: mockListComments },
+      pulls: { listReviewComments: mockListReviewComments },
+    };
+  }
+  return { mockPaginate, mockListComments, mockListReviewComments, MockOctokit };
+});
+
+vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit }));
+
+import { detectRateAnomaly } from '../index.js';
+
+const NOW = Date.parse('2026-06-24T10:10:00.000Z');
+const within = (secAgo: number) => new Date(NOW - secAgo * 1000).toISOString();
+
+const BOT = 'docker-agent';
+const REVIEW_MARKER = '<!-- docker-agent-review -->';
+const REPLY_MARKER = '<!-- docker-agent-review-reply -->';
+
+function agentComment(secAgo: number, marker = REVIEW_MARKER) {
+  return { user: { login: BOT }, body: `Review body ${marker}`, created_at: within(secAgo) };
+}
+
+// Route paginate() to the right dataset based on which endpoint it was given.
+function routePaginate(issue: unknown[], review: unknown[]) {
+  mockPaginate.mockImplementation((endpoint: unknown) => {
+    if (endpoint === mockListReviewComments) return Promise.resolve(review);
+    return Promise.resolve(issue);
+  });
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('detectRateAnomaly', () => {
+  const base = {
+    owner: 'docker',
+    repo: 'repo',
+    prNumber: 5,
+    windowSeconds: 600,
+    threshold: 3,
+    botLogin: BOT,
+    nowMs: NOW,
+  };
+
+  it('counts agent review + reply comments within the window across both comment types', async () => {
+    routePaginate(
+      [agentComment(60), agentComment(120, REPLY_MARKER)],
+      [agentComment(30), agentComment(90)],
+    );
+
+    const r = await detectRateAnomaly('tok', base);
+
+    expect(r.count).toBe(4);
+    expect(r.anomalous).toBe(true);
+    expect(r.threshold).toBe(3);
+  });
+
+  it('is not anomalous below the threshold', async () => {
+    routePaginate([agentComment(60)], [agentComment(30)]);
+    const r = await detectRateAnomaly('tok', base);
+    expect(r.count).toBe(2);
+    expect(r.anomalous).toBe(false);
+  });
+
+  it('ignores comments outside the window (created before windowStart)', async () => {
+    routePaginate(
+      [agentComment(60), agentComment(2000 /* 33min ago, outside 600s */)],
+      [agentComment(30)],
+    );
+    const r = await detectRateAnomaly('tok', base);
+    expect(r.count).toBe(2);
+  });
+
+  it('ignores comments from other users', async () => {
+    routePaginate(
+      [{ user: { login: 'mallory' }, body: `spam ${REVIEW_MARKER}`, created_at: within(10) }],
+      [],
+    );
+    const r = await detectRateAnomaly('tok', base);
+    expect(r.count).toBe(0);
+    expect(r.anomalous).toBe(false);
+  });
+
+  it('ignores agent comments that lack a review marker (e.g. ordinary chatter)', async () => {
+    routePaginate(
+      [{ user: { login: BOT }, body: 'just a plain comment', created_at: within(10) }],
+      [],
+    );
+    const r = await detectRateAnomaly('tok', base);
+    expect(r.count).toBe(0);
+  });
+
+  it('counts legacy cagent markers during the migration window', async () => {
+    routePaginate(
+      [{ user: { login: BOT }, body: 'old <!-- cagent-review -->', created_at: within(10) }],
+      [],
+    );
+    const r = await detectRateAnomaly('tok', { ...base, threshold: 1 });
+    expect(r.count).toBe(1);
+    expect(r.anomalous).toBe(true);
+  });
+
+  it('passes a since timestamp derived from the window to the API', async () => {
+    routePaginate([], []);
+    await detectRateAnomaly('tok', base);
+    const issueCall = mockPaginate.mock.calls.find((c) => c[0] === mockListComments);
+    expect(issueCall?.[1]).toMatchObject({
+      owner: 'docker',
+      repo: 'repo',
+      issue_number: 5,
+      since: new Date(NOW - 600 * 1000).toISOString(),
+    });
+  });
+});
diff --git a/src/rate-limit/index.ts b/src/rate-limit/index.ts
new file mode 100644
index 0000000..36ebdb7
--- /dev/null
+++ b/src/rate-limit/index.ts
@@ -0,0 +1,200 @@
+// Copyright The Docker Agent Action authors
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * rate-limit — detect (and let the workflow prevent) abnormally frequent review
+ * activity on a single pull request.
+ *
+ * The existing per-PR cache lock (review-pr/action.yml) only stops *concurrent*
+ * reviews and only on the review path; an authorized account can still drive the
+ * bot at high frequency (each request costs an LLM run). This module adds a
+ * frequency check: it counts how many docker-agent review/reply comments were
+ * posted on the PR within a recent time window and flags the request as a rate
+ * anomaly when that count crosses a threshold. The workflow gates the expensive
+ * review step on the result, so a burst is throttled rather than run N times.
+ *
+ * Counting the bot's own marker comments (rather than raw triggers) is the
+ * signal that is reliably observable with the repo-scoped token already present,
+ * and it directly measures how hard the bot is being driven on that PR.
+ *
+ * Exported:
+ *   detectRateAnomaly(token, opts) → RateAnomalyResult
+ *
+ * CLI (invoked via dist/rate-limit.js): inputs from environment variables:
+ *   GITHUB_TOKEN / GH_TOKEN   Token with pull-request read scope
+ *   GITHUB_REPOSITORY         "owner/repo"
+ *   RATE_PR_NUMBER            PR number
+ *   RATE_WINDOW_SECONDS       Sliding window in seconds (default 600)
+ *   RATE_MAX_REQUESTS         Anomaly threshold (default 8)
+ *   RATE_BOT_LOGIN            Bot login whose comments are counted (default "docker-agent")
+ * Outputs (via @actions/core.setOutput): anomalous ("true"|"false"), count, window, threshold.
+ */
+import * as core from '@actions/core';
+import { Octokit } from '@octokit/rest';
+
+// Review markers also matched by review-pr.yml / review-pr/action.yml. Counting
+// any of these captures both full reviews and conversational replies, and the
+// legacy cagent-* markers keep older threads countable during migration.
+const REVIEW_MARKERS = [
+  '<!-- docker-agent-review -->',
+  '<!-- docker-agent-review-reply -->',
+  '<!-- cagent-review -->',
+  '<!-- cagent-review-reply -->',
+];
+
+export interface RateAnomalyOptions {
+  owner: string;
+  repo: string;
+  prNumber: number;
+  /** Sliding window in seconds. */
+  windowSeconds: number;
+  /** Inclusive count at/above which the request is flagged anomalous. */
+  threshold: number;
+  /** Login whose review/reply comments are counted. */
+  botLogin: string;
+  /** Reference "now" in epoch ms (injectable for deterministic tests). */
+  nowMs?: number;
+}
+
+export interface RateAnomalyResult {
+  count: number;
+  anomalous: boolean;
+  windowSeconds: number;
+  threshold: number;
+}
+
+interface CommentLike {
+  user?: { login?: string } | null;
+  body?: string | null;
+  created_at?: string;
+}
+
+function isAgentReviewComment(c: CommentLike, botLogin: string, windowStartMs: number): boolean {
+  if (c.user?.login !== botLogin) return false;
+  const body = c.body ?? '';
+  if (!REVIEW_MARKERS.some((m) => body.includes(m))) return false;
+  if (!c.created_at) return false;
+  const created = Date.parse(c.created_at);
+  return Number.isFinite(created) && created >= windowStartMs;
+}
+
+/**
+ * Count docker-agent review/reply comments on `prNumber` created within the last
+ * `windowSeconds`, across both issue comments and inline review comments, and
+ * decide whether the count constitutes a rate anomaly.
+ */
+export async function detectRateAnomaly(
+  token: string,
+  opts: RateAnomalyOptions,
+): Promise<RateAnomalyResult> {
+  const octokit = new Octokit({ auth: token });
+  const now = opts.nowMs ?? Date.now();
+  const windowStartMs = now - opts.windowSeconds * 1000;
+  const since = new Date(windowStartMs).toISOString();
+
+  // `since` filters server-side by updated_at; the per-comment created_at check
+  // below enforces the precise creation window. paginate() handles busy PRs.
+  const issueComments: CommentLike[] = await octokit.paginate(octokit.rest.issues.listComments, {
+    owner: opts.owner,
+    repo: opts.repo,
+    issue_number: opts.prNumber,
+    since,
+    per_page: 100,
+  });
+  const reviewComments: CommentLike[] = await octokit.paginate(
+    octokit.rest.pulls.listReviewComments,
+    {
+      owner: opts.owner,
+      repo: opts.repo,
+      pull_number: opts.prNumber,
+      since,
+      per_page: 100,
+    },
+  );
+
+  const count = [...issueComments, ...reviewComments].filter((c) =>
+    isAgentReviewComment(c, opts.botLogin, windowStartMs),
+  ).length;
+
+  return {
+    count,
+    anomalous: count >= opts.threshold,
+    windowSeconds: opts.windowSeconds,
+    threshold: opts.threshold,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+
+function parsePositiveInt(value: string | undefined, fallback: number): number {
+  const n = Number.parseInt(value ?? '', 10);
+  return Number.isInteger(n) && n > 0 ? n : fallback;
+}
+
+async function main(): Promise<void> {
+  const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? '';
+  const repository = process.env.GITHUB_REPOSITORY ?? '';
+  const prNumber = Number.parseInt(process.env.RATE_PR_NUMBER ?? '', 10);
+  const windowSeconds = parsePositiveInt(process.env.RATE_WINDOW_SECONDS, 600);
+  const threshold = parsePositiveInt(process.env.RATE_MAX_REQUESTS, 8);
+  const botLogin = process.env.RATE_BOT_LOGIN?.trim() || 'docker-agent';
+
+  // Fail open: a missing token or unparseable PR number must not block reviews.
+  if (!token || !Number.isInteger(prNumber) || prNumber <= 0) {
+    core.warning('rate-limit: missing token or PR number — skipping rate check (fail-open)');
+    core.setOutput('anomalous', 'false');
+    core.setOutput('count', '0');
+    return;
+  }
+
+  const slashIdx = repository.indexOf('/');
+  if (slashIdx < 0) {
+    core.warning(`rate-limit: invalid GITHUB_REPOSITORY '${repository}' — skipping (fail-open)`);
+    core.setOutput('anomalous', 'false');
+    core.setOutput('count', '0');
+    return;
+  }
+  const owner = repository.slice(0, slashIdx);
+  const repo = repository.slice(slashIdx + 1);
+
+  try {
+    const result = await detectRateAnomaly(token, {
+      owner,
+      repo,
+      prNumber,
+      windowSeconds,
+      threshold,
+      botLogin,
+    });
+    core.setOutput('anomalous', String(result.anomalous));
+    core.setOutput('count', String(result.count));
+    core.setOutput('window', String(result.windowSeconds));
+    core.setOutput('threshold', String(result.threshold));
+
+    if (result.anomalous) {
+      core.warning(
+        `Rate anomaly on PR #${prNumber}: ${result.count} agent review/reply comments in the ` +
+          `last ${windowSeconds}s (threshold ${threshold}). Throttling this request.`,
+      );
+    } else {
+      core.info(
+        `✅ Rate OK on PR #${prNumber}: ${result.count}/${threshold} agent comments in ${windowSeconds}s`,
+      );
+    }
+  } catch (err: unknown) {
+    // Fail open on API errors — never let the rate check itself break reviews.
+    core.warning(
+      `rate-limit check failed (fail-open): ${err instanceof Error ? err.message : String(err)}`,
+    );
+    core.setOutput('anomalous', 'false');
+    core.setOutput('count', '0');
+  }
+}
+
+if (process.argv[1]?.endsWith('rate-limit.js') && !process.env.VITEST) {
+  main().catch((err: unknown) => {
+    core.warning(`rate-limit failed: ${err instanceof Error ? err.message : String(err)}`);
+  });
+}
diff --git a/tsup.config.ts b/tsup.config.ts
index 3bb1665..f4152f9 100644
--- a/tsup.config.ts
+++ b/tsup.config.ts
@@ -23,14 +23,17 @@ const src = (name: string) => {
   return p;
 };
 const entry = {
+  'audit-log': src('audit-log'),
   'auto-filter-diff': src('auto-filter-diff'),
   'check-org-membership': src('check-org-membership'),
+  'check-staleness': src('check-staleness'),
   credentials: src('credentials'),
   'filter-diff': src('filter-diff'),
   main: src('main'),
   'mention-reply': src('mention-reply'),
   'migrate-consumer-refs': src('migrate-consumer-refs'),
   'post-mention-reply': src('post-mention-reply'),
+  'rate-limit': src('rate-limit'),
   'score-risk': src('score-risk'),
   security: src('security'),
   'signed-commit': src('signed-commit'),

From b69a05c3b6021a2a56722256c9720d4ca78b1552 Mon Sep 17 00:00:00 2001
From: Sayt-0 <louis-dalmorocompta@docker.com>
Date: Wed, 24 Jun 2026 13:37:46 +0200
Subject: [PATCH 2/6] fix(review-pr): run audit step before untrusted checkout
 (CodeQL TOCTOU)

CodeQL (actions/untrusted-checkout-toctou/critical) flagged the new
"Audit review request" run-step because it executed after "Checkout PR head"
checks out untrusted PR code in an issue_comment-privileged workflow. The audit
decision is fully determined before checkout (membership, rate, staleness), so
move the audit and audit-upload steps ahead of the checkout. No post-checkout
run-step is introduced by this change; behavior and recorded fields are
unchanged.
---
 .github/workflows/review-pr.yml | 125 ++++++++++++++++----------------
 1 file changed, 64 insertions(+), 61 deletions(-)

diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
index 4fb3b37..ba6382c 100644
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@@ -394,73 +394,15 @@ jobs:
             });
             core.setOutput('check-id', check.id);
 
-      - name: Checkout PR head
-        if: |
-          steps.membership.outputs.is_member == 'true' &&
-          steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true' &&
-          steps.rate.outputs.anomalous != 'true'
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 0
-          ref: refs/pull/${{ steps.pr.outputs.number }}/head
-
-      - name: Run PR Review
-        if: |
-          steps.membership.outputs.is_member == 'true' &&
-          steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true' &&
-          steps.rate.outputs.anomalous != 'true'
-        id: run-review
-        continue-on-error: true
-        uses: docker/docker-agent-action/review-pr@e96a4bb40cac114f64358621e1d08346c8eadc8c # v2.0.1
-        with:
-          pr-number: ${{ steps.pr.outputs.number }}
-          comment-id: ${{ inputs.comment-id || github.event.comment.id }}
-          additional-prompt: ${{ inputs.additional-prompt }}
-          add-prompt-files: ${{ inputs.add-prompt-files }}
-          exclude-paths: ${{ inputs.exclude-paths }}
-          max-diff-lines: ${{ inputs.max-diff-lines }}
-          model: ${{ inputs.model }}
-          github-token: ${{ env.GITHUB_APP_TOKEN || github.token }}
-          anthropic-api-key: ${{ env.ANTHROPIC_API_KEY_FROM_SSM || secrets.ANTHROPIC_API_KEY }}
-          openai-api-key: ${{ env.OPENAI_API_KEY_FROM_SSM || secrets.OPENAI_API_KEY }}
-          google-api-key: ${{ secrets.GOOGLE_API_KEY }}
-          aws-bearer-token-bedrock: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }}
-          xai-api-key: ${{ secrets.XAI_API_KEY }}
-          nebius-api-key: ${{ secrets.NEBIUS_API_KEY }}
-          mistral-api-key: ${{ secrets.MISTRAL_API_KEY }}
-          skip-auth: "true"
-
-      - name: Update check run
-        if: always() && steps.create-check.outputs.check-id != ''
-        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
-        env:
-          CHECK_ID: ${{ steps.create-check.outputs.check-id }}
-          JOB_STATUS: ${{ job.status }}
-        with:
-          github-token: ${{ github.token }}
-          script: |
-            const conclusion = process.env.JOB_STATUS === 'cancelled' ? 'cancelled' : process.env.JOB_STATUS === 'success' ? 'success' : 'failure';
-            try {
-              await github.rest.checks.update({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                check_run_id: parseInt(process.env.CHECK_ID, 10),
-                status: 'completed',
-                conclusion: conclusion,
-                completed_at: new Date().toISOString()
-              });
-            } catch (error) {
-              core.warning(`Failed to update check run: ${error.message}`);
-            }
-
       # Audit safeguard: emit one structured record per review request that joins
       # the requester, time, trigger, PR, reviewed SHA, and the authorize/deny/
       # throttle decision. Runs on the allow AND deny paths so denied, throttled,
       # and stale requests are all recorded, not just successful ones. If
       # setup-credentials failed (no dist bundle / DOCKER_AGENT_ACTION_ROOT), the
       # record is still emitted inline so even infra-failure denials are logged.
+      # Placed BEFORE "Checkout PR head" on purpose: the decision is fully known by
+      # now, and keeping this run-step ahead of the untrusted checkout avoids an
+      # execute-after-untrusted-checkout (TOCTOU) pattern.
       - name: Audit review request
         if: |
           always() &&
@@ -534,6 +476,67 @@ jobs:
           retention-days: 90
           if-no-files-found: ignore
 
+      - name: Checkout PR head
+        if: |
+          steps.membership.outputs.is_member == 'true' &&
+          steps.command.outputs.is_review != 'false' &&
+          steps.draft.outputs.skip != 'true' &&
+          steps.rate.outputs.anomalous != 'true'
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          ref: refs/pull/${{ steps.pr.outputs.number }}/head
+
+      - name: Run PR Review
+        if: |
+          steps.membership.outputs.is_member == 'true' &&
+          steps.command.outputs.is_review != 'false' &&
+          steps.draft.outputs.skip != 'true' &&
+          steps.rate.outputs.anomalous != 'true'
+        id: run-review
+        continue-on-error: true
+        uses: docker/docker-agent-action/review-pr@e96a4bb40cac114f64358621e1d08346c8eadc8c # v2.0.1
+        with:
+          pr-number: ${{ steps.pr.outputs.number }}
+          comment-id: ${{ inputs.comment-id || github.event.comment.id }}
+          additional-prompt: ${{ inputs.additional-prompt }}
+          add-prompt-files: ${{ inputs.add-prompt-files }}
+          exclude-paths: ${{ inputs.exclude-paths }}
+          max-diff-lines: ${{ inputs.max-diff-lines }}
+          model: ${{ inputs.model }}
+          github-token: ${{ env.GITHUB_APP_TOKEN || github.token }}
+          anthropic-api-key: ${{ env.ANTHROPIC_API_KEY_FROM_SSM || secrets.ANTHROPIC_API_KEY }}
+          openai-api-key: ${{ env.OPENAI_API_KEY_FROM_SSM || secrets.OPENAI_API_KEY }}
+          google-api-key: ${{ secrets.GOOGLE_API_KEY }}
+          aws-bearer-token-bedrock: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }}
+          xai-api-key: ${{ secrets.XAI_API_KEY }}
+          nebius-api-key: ${{ secrets.NEBIUS_API_KEY }}
+          mistral-api-key: ${{ secrets.MISTRAL_API_KEY }}
+          skip-auth: "true"
+
+      - name: Update check run
+        if: always() && steps.create-check.outputs.check-id != ''
+        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        env:
+          CHECK_ID: ${{ steps.create-check.outputs.check-id }}
+          JOB_STATUS: ${{ job.status }}
+        with:
+          github-token: ${{ github.token }}
+          script: |
+            const conclusion = process.env.JOB_STATUS === 'cancelled' ? 'cancelled' : process.env.JOB_STATUS === 'success' ? 'success' : 'failure';
+            try {
+              await github.rest.checks.update({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                check_run_id: parseInt(process.env.CHECK_ID, 10),
+                status: 'completed',
+                conclusion: conclusion,
+                completed_at: new Date().toISOString()
+              });
+            } catch (error) {
+              core.warning(`Failed to update check run: ${error.message}`);
+            }
+
   reply-to-feedback:
     needs: [resolve-context]
     if: |

From 7f5e3c35f05b4b0f64d101f3541fcb56cb43c745 Mon Sep 17 00:00:00 2001
From: Sayt-0 <louis-dalmorocompta@docker.com>
Date: Fri, 26 Jun 2026 10:10:33 +0200
Subject: [PATCH 3/6] refactor(review-pr): drop audit-log and staleness from
 abuse safeguards

Per review feedback, defer the audit log to a follow-up (GitHub artifacts are a
weak audit store; the storage backend should be designed separately) and remove
the force-push staleness check (reviews already run against refs/pull/N/head, so
the latest commit is included; incremental review is the real future want and is
tracked separately). Keeps the rate-anomaly safeguard (rate-limit module +
per-PR concurrency groups); no merge-gating step depended on the removed modules.
---
 .github/workflows/review-pr.yml               | 101 ---------
 SECURITY.md                                   |  37 +---
 review-pr/README.md                           |   8 +-
 src/audit-log/__tests__/audit-log.test.ts     | 167 ---------------
 src/audit-log/index.ts                        | 194 ------------------
 .../__tests__/check-staleness.test.ts         |  74 -------
 src/check-staleness/index.ts                  | 124 -----------
 tsup.config.ts                                |   2 -
 8 files changed, 7 insertions(+), 700 deletions(-)
 delete mode 100644 src/audit-log/__tests__/audit-log.test.ts
 delete mode 100644 src/audit-log/index.ts
 delete mode 100644 src/check-staleness/__tests__/check-staleness.test.ts
 delete mode 100644 src/check-staleness/index.ts

diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
index a670757..6a55ed2 100644
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@@ -348,25 +348,6 @@ jobs:
           RATE_PR_NUMBER: ${{ steps.pr.outputs.number }}
         run: node "$DOCKER_AGENT_ACTION_ROOT/dist/rate-limit.js"
 
-      # Force-push safeguard: compare the SHA the review was requested for against
-      # the current PR head. The review still runs against the latest commit
-      # (refs/pull/N/head); this records the SHA actually reviewed and posts a
-      # notice when the branch was force-pushed/rebased after the request.
-      - name: Check force-push staleness
-        id: staleness
-        if: |
-          steps.membership.outputs.is_member == 'true' &&
-          steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true' &&
-          steps.rate.outputs.anomalous != 'true'
-        continue-on-error: true   # fail-open: a staleness-check error must not block reviews
-        shell: bash
-        env:
-          GITHUB_TOKEN: ${{ env.GITHUB_APP_TOKEN || github.token }}
-          STALE_PR_NUMBER: ${{ steps.pr.outputs.number }}
-          STALE_REQUESTED_SHA: ${{ needs.resolve-context.outputs.pr-head-sha || github.event.pull_request.head.sha }}
-        run: node "$DOCKER_AGENT_ACTION_ROOT/dist/check-staleness.js"
-
       - name: Create check run
         if: |
           (steps.pr.outputs.source == 'event' || steps.pr.outputs.source == 'trigger') &&
@@ -398,88 +379,6 @@ jobs:
             });
             core.setOutput('check-id', check.id);
 
-      # Audit safeguard: emit one structured record per review request that joins
-      # the requester, time, trigger, PR, reviewed SHA, and the authorize/deny/
-      # throttle decision. Runs on the allow AND deny paths so denied, throttled,
-      # and stale requests are all recorded, not just successful ones. If
-      # setup-credentials failed (no dist bundle / DOCKER_AGENT_ACTION_ROOT), the
-      # record is still emitted inline so even infra-failure denials are logged.
-      # Placed BEFORE "Checkout PR head" on purpose: the decision is fully known by
-      # now, and keeping this run-step ahead of the untrusted checkout avoids an
-      # execute-after-untrusted-checkout (TOCTOU) pattern.
-      - name: Audit review request
-        if: |
-          always() &&
-          steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true'
-        continue-on-error: true
-        shell: bash
-        env:
-          IS_MEMBER: ${{ steps.membership.outputs.is_member }}
-          RATE_ANOMALOUS: ${{ steps.rate.outputs.anomalous }}
-          STALE: ${{ steps.staleness.outputs.stale }}
-          CURRENT_SHA: ${{ steps.staleness.outputs.current-sha }}
-          REQUESTED_SHA: ${{ needs.resolve-context.outputs.pr-head-sha || github.event.pull_request.head.sha }}
-          USER_REQUESTED: ${{ steps.trigger-type.outputs.user_requested }}
-          AUDIT_ACTOR: ${{ github.event.comment.user.login || github.event.sender.login || github.actor }}
-          AUDIT_PR_NUMBER: ${{ steps.pr.outputs.number }}
-          AUDIT_EVENT: ${{ github.event_name }}
-          REVIEW_AUDIT_FILE: ${{ runner.temp }}/review-audit/review-audit.jsonl
-        run: |
-          if [ "$IS_MEMBER" != "true" ]; then
-            DECISION=denied
-            REASON="requester is not a docker org member"
-          elif [ "$RATE_ANOMALOUS" = "true" ]; then
-            DECISION=throttled
-            REASON="rate anomaly: too many recent review requests on this PR"
-          else
-            DECISION=authorized
-            if [ "$STALE" = "true" ]; then
-              REASON="docker org member (PR head moved after request — reviewing latest commit)"
-            else
-              REASON="docker org member"
-            fi
-          fi
-          export AUDIT_DECISION="$DECISION"
-          export AUDIT_REASON="$REASON"
-          export AUDIT_TRIGGER="$([ "$USER_REQUESTED" = "true" ] && echo user-requested || echo automatic)"
-          export AUDIT_HEAD_SHA="${CURRENT_SHA:-$REQUESTED_SHA}"
-          export AUDIT_REQUESTED_SHA="$REQUESTED_SHA"
-
-          # If setup-credentials failed (OIDC/Secrets Manager misconfig), the dist
-          # bundle and DOCKER_AGENT_ACTION_ROOT are never set. Still record the
-          # denial inline so the "log every request, including denials" guarantee
-          # holds even on infra-failure paths.
-          if [ -z "$DOCKER_AGENT_ACTION_ROOT" ] || [ ! -f "$DOCKER_AGENT_ACTION_ROOT/dist/audit-log.js" ]; then
-            TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
-            REC=$(jq -nc \
-              --arg ts "$TS" --arg event "$AUDIT_EVENT" --arg trigger "$AUDIT_TRIGGER" \
-              --arg actor "${AUDIT_ACTOR:-unknown}" --arg repo "$GITHUB_REPOSITORY" \
-              --arg pr "$AUDIT_PR_NUMBER" --arg head "$AUDIT_HEAD_SHA" \
-              --arg requested "$AUDIT_REQUESTED_SHA" --arg decision "$AUDIT_DECISION" \
-              --arg reason "$AUDIT_REASON (audit-log bundle unavailable — emitted inline)" \
-              '{timestamp:$ts,event:$event,trigger:$trigger,actor:$actor,repository:$repo,prNumber:$pr,headSha:$head,requestedSha:$requested,decision:$decision,reason:$reason}')
-            echo "::notice title=Review request audit::[review-request-audit] $REC"
-            mkdir -p "$(dirname "$REVIEW_AUDIT_FILE")"
-            printf '%s\n' "$REC" >> "$REVIEW_AUDIT_FILE" || true
-            exit 0
-          fi
-
-          node "$DOCKER_AGENT_ACTION_ROOT/dist/audit-log.js" || echo "::warning::audit-log failed"
-
-      - name: Upload review audit log
-        if: |
-          always() &&
-          steps.command.outputs.is_review != 'false' &&
-          steps.draft.outputs.skip != 'true'
-        continue-on-error: true
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
-        with:
-          name: pr-review-audit-${{ github.run_id }}-${{ github.run_attempt }}
-          path: ${{ runner.temp }}/review-audit/
-          retention-days: 90
-          if-no-files-found: ignore
-
       - name: Checkout PR head
         if: |
           steps.membership.outputs.is_member == 'true' &&
diff --git a/SECURITY.md b/SECURITY.md
index 8f648c3..f67c940 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -26,7 +26,7 @@ This action includes **built-in security features for all agent executions**:
    - **Suspicious patterns** (strip + warn): Behavioral/natural-language injection attempts ("ignore previous instructions", "system mode", "reveal the token", base64/hex obfuscation, etc.) — matching lines are removed from the prompt before it reaches the agent
    - **Medium-risk patterns** (warn only): API key variable names in configuration (`ANTHROPIC_API_KEY`, `GITHUB_TOKEN`, etc.)
 
-4. **Review-Request Abuse Safeguards** — the comment/event-triggered PR review pipeline is hardened against abuse on every trigger path: org-membership validation, a structured audit log of every request (including denials), rate-anomaly throttling, and force-push staleness handling. See [Review-Request Abuse Safeguards](#review-request-abuse-safeguards).
+4. **Review-Request Abuse Safeguards** — the comment/event-triggered PR review pipeline is hardened against abuse on every trigger path: org-membership validation and rate-anomaly throttling. See [Review-Request Abuse Safeguards](#review-request-abuse-safeguards).
 
 ## Security Architecture
 
@@ -75,7 +75,7 @@ The action implements a defense-in-depth approach:
 
 The PR review pipeline (`.github/workflows/review-pr.yml` and the `review-pr/`
 composite actions) is comment- and event-triggered, which makes it the main
-abuse vector for cost/spam. Four safeguards harden the review-request flow on
+abuse vector for cost/spam. Two safeguards harden the review-request flow on
 every trigger path (`review_requested`, the deprecated `/review` comment,
 `@docker-agent` mentions, automatic `opened`/`synchronize`, and the
 `workflow_run` fork path).
@@ -98,23 +98,7 @@ then the trusted requester for `review_requested`), resolving the PR author live
 via the API so the directly-wired `pull_request` path verifies the author
 instead of an empty comment author.
 
-### 2. All review requests are logged
-
-`src/audit-log` emits one structured audit record per review request — on the
-allow path **and** on every denial/throttle path — joining the fields needed to
-investigate abuse after the fact: requester login, ISO-8601 timestamp, trigger
-type, repository, PR number, reviewed head SHA (and requested SHA when they
-differ), and the `authorized | denied | throttled | skipped | stale` decision
-with a reason. Each record is emitted three ways so it survives even when raw
-run logs are gone:
-
-- a `core.notice` annotation prefixed `[review-request-audit]` (queryable via the
-  annotations API and greppable in logs),
-- a line appended to the job summary (`$GITHUB_STEP_SUMMARY`), and
-- an append-only JSONL line uploaded as the `pr-review-audit-*` artifact
-  (90-day retention).
-
-### 3. Rate anomalies are detected and throttled
+### 2. Rate anomalies are detected and throttled
 
 Authorization gates *who* may trigger a review, not *how often*. Three layers
 bound request frequency:
@@ -125,22 +109,11 @@ bound request frequency:
   intent so a quick conversational reply is never queued behind a long review.
 - **`src/rate-limit`** counts docker-agent review/reply comments posted on the PR
   within a sliding window (default 600 s) and, when the count crosses a threshold
-  (default 8), flags a rate anomaly. The review job skips the expensive review and
-  records a `throttled` audit entry. The check **fails open** so an API error
-  never blocks a legitimate review.
+  (default 8), flags a rate anomaly. The review job skips the expensive review.
+  The check **fails open** so an API error never blocks a legitimate review.
 - The existing in-action **cache lock** (`pr-review-lock-<repo>-<pr>-*`, 600 s
   TTL) prevents concurrent reviews from racing on the same PR.
 
-### 4. Force-pushed / rebased commits are handled
-
-A review always checks out `refs/pull/N/head` (the current head), but the head
-SHA captured when the review was requested may differ after a force-push/rebase.
-`src/check-staleness` re-fetches the current head and compares it to the
-requested SHA. The review proceeds against the latest commit (the freshest code
-is what should be reviewed); the SHA actually reviewed is recorded in the audit
-log and a `core.notice` is posted when the branch moved, so a force-push during
-the request window is visible rather than silent.
-
 ## Security Modules
 
 All security logic lives under `src/security/` and is compiled into `dist/security.js` by
diff --git a/review-pr/README.md b/review-pr/README.md
index 4eccbf9..e572b5c 100644
--- a/review-pr/README.md
+++ b/review-pr/README.md
@@ -191,10 +191,8 @@ with:
 >
 > 1. **Verifies org membership** before every review. Auto-review checks the **PR author** (so only org members' PRs are reviewed automatically); a **requested review** checks the **requester**, so a maintainer can pull an external contributor's PR into review on demand; `/review` checks the commenter
 > 2. **Prevents bot cascades** — replies from bots (except `docker-agent`) are ignored
-> 3. **Logs every review request** — one structured audit record per request (requester, time, trigger, PR, reviewed SHA, allow/deny/throttle decision) is emitted as a notice, a job-summary line, and a 90-day audit artifact, including on denial paths
-> 4. **Throttles rate anomalies** — per-PR `concurrency:` groups collapse same-trigger bursts, and a rate-limit check skips the review when too many requests land on one PR in a short window
-> 5. **Handles force-pushes** — when a PR is rebased/force-pushed after a review is requested, the review runs against the latest commit and the actually-reviewed SHA is recorded with a notice
-> 6. **Fork PRs work automatically** with the two-workflow setup — the trigger → `workflow_run` pattern provides OIDC/secret access regardless of fork status
+> 3. **Throttles rate anomalies** — per-PR `concurrency:` groups collapse same-trigger bursts, and a rate-limit check skips the review when too many requests land on one PR in a short window
+> 4. **Fork PRs work automatically** with the two-workflow setup — the trigger → `workflow_run` pattern provides OIDC/secret access regardless of fork status
 
 ### What you don't need to add
 
@@ -208,8 +206,6 @@ The workflow YAML examples above are the complete, recommended setup. The reusab
 | **Draft PR skipping** | Draft PRs are skipped internally — no caller condition needed. |
 | **Concurrent review guard** | A cache-based lock (`pr-review-lock-<repo>-<pr>-*`) prevents duplicate reviews from racing on the same PR. |
 | **Rate-anomaly throttling** | Per-PR `concurrency:` groups serialize same-trigger bursts, and a rate-limit check skips the review when too many docker-agent review/reply comments land on one PR within the window. No caller configuration needed. |
-| **Review-request audit log** | Every request is recorded (notice + job summary + 90-day `pr-review-audit-*` artifact) with requester, trigger, PR, reviewed SHA, and the allow/deny/throttle decision — including denied and throttled requests. |
-| **Force-push handling** | The SHA a review was requested for is compared to the current head; the review runs against the latest commit and the reviewed SHA is recorded so a force-push mid-flight is visible rather than silent. |
 
 **The only decision callers make** is which setup pattern to use: 1-workflow for same-repo PRs, 2-workflow for repos that accept fork PRs. That distinction is the caller's responsibility because it controls which event path delivers OIDC credentials to the reusable workflow.
 
diff --git a/src/audit-log/__tests__/audit-log.test.ts b/src/audit-log/__tests__/audit-log.test.ts
deleted file mode 100644
index 230592d..0000000
--- a/src/audit-log/__tests__/audit-log.test.ts
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright The Docker Agent Action authors
-// SPDX-License-Identifier: Apache-2.0
-
-import { beforeEach, describe, expect, it, vi } from 'vitest';
-
-const { mockNotice, mockWarning, mockAddRaw, mockAppendFileSync, mockMkdirSync } = vi.hoisted(
-  () => ({
-    mockNotice: vi.fn(),
-    mockWarning: vi.fn(),
-    mockAddRaw: vi.fn(),
-    mockAppendFileSync: vi.fn(),
-    mockMkdirSync: vi.fn(),
-  }),
-);
-
-vi.mock('@actions/core', () => ({
-  notice: mockNotice,
-  warning: mockWarning,
-  info: vi.fn(),
-  setOutput: vi.fn(),
-  summary: { addRaw: mockAddRaw, write: vi.fn().mockResolvedValue(undefined) },
-}));
-
-vi.mock('node:fs', () => ({
-  appendFileSync: mockAppendFileSync,
-  mkdirSync: mockMkdirSync,
-}));
-
-import {
-  buildAuditRecord,
-  emitAuditRecord,
-  formatNotice,
-  formatSummaryLine,
-  resolveAuditFilePath,
-} from '../index.js';
-
-beforeEach(() => {
-  vi.clearAllMocks();
-});
-
-describe('buildAuditRecord', () => {
-  it('maps environment variables into a full record', () => {
-    const r = buildAuditRecord({
-      AUDIT_TIMESTAMP: '2026-06-24T10:00:00.000Z',
-      AUDIT_EVENT: 'pull_request',
-      AUDIT_TRIGGER: 'review_requested',
-      AUDIT_ACTOR: 'alice',
-      AUDIT_PR_NUMBER: '42',
-      AUDIT_HEAD_SHA: 'abc123def456',
-      AUDIT_DECISION: 'authorized',
-      AUDIT_REASON: 'org member',
-      GITHUB_REPOSITORY: 'docker/repo',
-      GITHUB_RUN_ID: '999',
-      GITHUB_SERVER_URL: 'https://github.com',
-    });
-
-    expect(r).toMatchObject({
-      timestamp: '2026-06-24T10:00:00.000Z',
-      event: 'pull_request',
-      trigger: 'review_requested',
-      actor: 'alice',
-      repository: 'docker/repo',
-      prNumber: '42',
-      headSha: 'abc123def456',
-      decision: 'authorized',
-      reason: 'org member',
-      runId: '999',
-      runUrl: 'https://github.com/docker/repo/actions/runs/999',
-    });
-  });
-
-  it('falls back to safe defaults for missing fields', () => {
-    const r = buildAuditRecord({});
-    expect(r.trigger).toBe('unknown');
-    expect(r.actor).toBe('unknown');
-    expect(r.prNumber).toBe('');
-    expect(r.runUrl).toBe('');
-    // timestamp defaults to a valid ISO string
-    expect(Number.isNaN(Date.parse(r.timestamp))).toBe(false);
-  });
-
-  it('coerces an invalid decision to "skipped"', () => {
-    expect(buildAuditRecord({ AUDIT_DECISION: 'bogus' }).decision).toBe('skipped');
-    expect(buildAuditRecord({ AUDIT_DECISION: 'denied' }).decision).toBe('denied');
-    expect(buildAuditRecord({ AUDIT_DECISION: 'throttled' }).decision).toBe('throttled');
-  });
-
-  it('falls back to GITHUB_EVENT_NAME when AUDIT_EVENT is unset', () => {
-    expect(buildAuditRecord({ GITHUB_EVENT_NAME: 'issue_comment' }).event).toBe('issue_comment');
-  });
-
-  it('omits runUrl when repository or run id is missing', () => {
-    expect(buildAuditRecord({ GITHUB_REPOSITORY: 'docker/repo' }).runUrl).toBe('');
-    expect(buildAuditRecord({ GITHUB_RUN_ID: '1' }).runUrl).toBe('');
-  });
-});
-
-describe('resolveAuditFilePath', () => {
-  it('prefers REVIEW_AUDIT_FILE', () => {
-    expect(resolveAuditFilePath({ REVIEW_AUDIT_FILE: '/custom/audit.jsonl' })).toBe(
-      '/custom/audit.jsonl',
-    );
-  });
-  it('falls back to RUNNER_TEMP', () => {
-    expect(resolveAuditFilePath({ RUNNER_TEMP: '/runner/tmp' })).toBe(
-      '/runner/tmp/review-audit.jsonl',
-    );
-  });
-  it('falls back to /tmp', () => {
-    expect(resolveAuditFilePath({})).toBe('/tmp/review-audit.jsonl');
-  });
-});
-
-describe('formatters', () => {
-  const record = buildAuditRecord({
-    AUDIT_TIMESTAMP: '2026-06-24T10:00:00.000Z',
-    AUDIT_TRIGGER: 'mention',
-    AUDIT_ACTOR: 'bob',
-    AUDIT_PR_NUMBER: '7',
-    AUDIT_HEAD_SHA: 'deadbeefcafe',
-    AUDIT_DECISION: 'denied',
-    AUDIT_REASON: 'not an org member',
-  });
-
-  it('formatNotice prefixes with a greppable tag and embeds JSON', () => {
-    const msg = formatNotice(record);
-    expect(msg.startsWith('[review-request-audit] ')).toBe(true);
-    const parsed = JSON.parse(msg.slice('[review-request-audit] '.length));
-    expect(parsed.actor).toBe('bob');
-    expect(parsed.decision).toBe('denied');
-  });
-
-  it('formatSummaryLine renders a concise markdown bullet with a short SHA', () => {
-    const line = formatSummaryLine(record);
-    expect(line).toContain('**denied**');
-    expect(line).toContain('@bob');
-    expect(line).toContain('PR #7');
-    expect(line).toContain('`deadbeef`'); // short SHA
-    expect(line).toContain('not an org member');
-  });
-});
-
-describe('emitAuditRecord', () => {
-  it('emits a notice, a summary line, and appends JSONL to the audit file', () => {
-    const record = buildAuditRecord({ AUDIT_ACTOR: 'carol', AUDIT_DECISION: 'authorized' });
-    emitAuditRecord(record, { auditFilePath: '/tmp/x/review-audit.jsonl' });
-
-    expect(mockNotice).toHaveBeenCalledTimes(1);
-    expect(mockAddRaw).toHaveBeenCalledTimes(1);
-    expect(mockMkdirSync).toHaveBeenCalledWith('/tmp/x', { recursive: true });
-    expect(mockAppendFileSync).toHaveBeenCalledTimes(1);
-    const [path, line] = mockAppendFileSync.mock.calls[0];
-    expect(path).toBe('/tmp/x/review-audit.jsonl');
-    expect(JSON.parse((line as string).trim()).actor).toBe('carol');
-  });
-
-  it('downgrades a file-write failure to a warning (never throws)', () => {
-    mockAppendFileSync.mockImplementationOnce(() => {
-      throw new Error('disk full');
-    });
-    const record = buildAuditRecord({ AUDIT_ACTOR: 'dave' });
-    expect(() => emitAuditRecord(record, { auditFilePath: '/tmp/y.jsonl' })).not.toThrow();
-    expect(mockWarning).toHaveBeenCalledTimes(1);
-    // The notice still fired despite the persistence failure.
-    expect(mockNotice).toHaveBeenCalledTimes(1);
-  });
-});
diff --git a/src/audit-log/index.ts b/src/audit-log/index.ts
deleted file mode 100644
index fc3632d..0000000
--- a/src/audit-log/index.ts
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright The Docker Agent Action authors
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * audit-log — emit a structured audit record for every review request.
- *
- * This is an abuse safeguard: GitHub Actions run logs are not a reliable audit
- * store (short retention, no query/export, deletable by repo admins), so each
- * review request — on every trigger path and crucially on every *denial* path —
- * is recorded as a single correlated record that joins the fields needed to
- * investigate abuse after the fact: who triggered it, when, via which trigger,
- * on which PR/SHA, and what the authorization decision was.
- *
- * Each invocation emits the record three ways:
- *   1. core.notice  — a queryable workflow annotation (survives in the run UI
- *                     and via the GitHub API even when raw logs are gone)
- *   2. job summary  — a human-readable line appended to $GITHUB_STEP_SUMMARY
- *   3. JSONL file   — an append-only line the workflow uploads as a
- *                     long-retention artifact for durable, machine-readable audit
- *
- * CLI (invoked as a shell run step via dist/audit-log.js):
- *   All inputs are read from environment variables:
- *     AUDIT_TRIGGER       Trigger type (e.g. "review_requested", "mention", "automatic")
- *     AUDIT_ACTOR         Login of the user who triggered the request
- *     AUDIT_DECISION      "authorized" | "denied" | "skipped" | "throttled" | "stale"
- *     AUDIT_REASON        Free-text reason for the decision (optional)
- *     AUDIT_PR_NUMBER     PR number (optional)
- *     AUDIT_HEAD_SHA      Reviewed head SHA (optional)
- *     AUDIT_REQUESTED_SHA SHA the review was requested for, if different (optional)
- *     AUDIT_EVENT         GitHub event name (optional; defaults to GITHUB_EVENT_NAME)
- *     AUDIT_TIMESTAMP     ISO-8601 timestamp (optional; defaults to now)
- *     REVIEW_AUDIT_FILE   Path for the JSONL audit file (optional; defaults to
- *                         $RUNNER_TEMP/review-audit.jsonl, else /tmp/review-audit.jsonl)
- *     GITHUB_REPOSITORY   "owner/repo" (standard GitHub Actions env var)
- *     GITHUB_RUN_ID       Run id (standard GitHub Actions env var)
- *     GITHUB_SERVER_URL   Server url (standard GitHub Actions env var)
- *
- * Guard: the CLI entry point only executes when process.argv[1] ends with
- * "audit-log.js" and VITEST is not set, so importing this module as a library
- * never triggers a write.
- */
-import { appendFileSync, mkdirSync } from 'node:fs';
-import { dirname } from 'node:path';
-import * as core from '@actions/core';
-
-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
-/** Authorization / handling outcome recorded for a review request. */
-export type AuditDecision = 'authorized' | 'denied' | 'skipped' | 'throttled' | 'stale';
-
-const VALID_DECISIONS: readonly AuditDecision[] = [
-  'authorized',
-  'denied',
-  'skipped',
-  'throttled',
-  'stale',
-];
-
-export interface ReviewAuditRecord {
-  /** ISO-8601 timestamp of when the request was processed. */
-  timestamp: string;
-  /** GitHub event name (e.g. "pull_request", "issue_comment"). */
-  event: string;
-  /** Logical trigger type (e.g. "review_requested", "mention", "automatic"). */
-  trigger: string;
-  /** Login of the user who triggered the request ("unknown" when unresolved). */
-  actor: string;
-  /** "owner/repo". */
-  repository: string;
-  /** PR number, or empty string when not applicable. */
-  prNumber: string;
-  /** Reviewed head SHA, or empty string. */
-  headSha: string;
-  /** SHA the review was requested for, when it differs from headSha. */
-  requestedSha: string;
-  /** Authorization / handling outcome. */
-  decision: AuditDecision;
-  /** Human-readable reason for the decision. */
-  reason: string;
-  /** Workflow run id. */
-  runId: string;
-  /** Direct URL to the workflow run, when derivable. */
-  runUrl: string;
-}
-
-// ---------------------------------------------------------------------------
-// Pure builder
-// ---------------------------------------------------------------------------
-
-/**
- * Build a {@link ReviewAuditRecord} from environment variables. Pure: performs
- * no I/O. Unknown/empty fields fall back to safe defaults so a partially-wired
- * caller still produces a usable record rather than throwing.
- */
-export function buildAuditRecord(env: NodeJS.ProcessEnv): ReviewAuditRecord {
-  const repository = env.GITHUB_REPOSITORY ?? '';
-  const runId = env.GITHUB_RUN_ID ?? '';
-  const serverUrl = env.GITHUB_SERVER_URL ?? 'https://github.com';
-
-  const decisionRaw = (env.AUDIT_DECISION ?? '').trim() as AuditDecision;
-  const decision: AuditDecision = VALID_DECISIONS.includes(decisionRaw) ? decisionRaw : 'skipped';
-
-  return {
-    timestamp: env.AUDIT_TIMESTAMP?.trim() || new Date().toISOString(),
-    event: (env.AUDIT_EVENT ?? env.GITHUB_EVENT_NAME ?? '').trim(),
-    trigger: (env.AUDIT_TRIGGER ?? '').trim() || 'unknown',
-    actor: (env.AUDIT_ACTOR ?? '').trim() || 'unknown',
-    repository,
-    prNumber: (env.AUDIT_PR_NUMBER ?? '').trim(),
-    headSha: (env.AUDIT_HEAD_SHA ?? '').trim(),
-    requestedSha: (env.AUDIT_REQUESTED_SHA ?? '').trim(),
-    decision,
-    reason: (env.AUDIT_REASON ?? '').trim(),
-    runId,
-    runUrl: repository && runId ? `${serverUrl}/${repository}/actions/runs/${runId}` : '',
-  };
-}
-
-// ---------------------------------------------------------------------------
-// Formatters (pure)
-// ---------------------------------------------------------------------------
-
-/** Single-line annotation message. The `[review-request-audit]` prefix makes
- *  records greppable in logs and filterable via the annotations API. */
-export function formatNotice(record: ReviewAuditRecord): string {
-  return `[review-request-audit] ${JSON.stringify(record)}`;
-}
-
-/** One markdown bullet for the job summary. SHAs are short-formed for brevity. */
-export function formatSummaryLine(record: ReviewAuditRecord): string {
-  const sha = record.headSha ? ` \`${record.headSha.slice(0, 8)}\`` : '';
-  const pr = record.prNumber ? ` PR #${record.prNumber}` : '';
-  const reason = record.reason ? ` — ${record.reason}` : '';
-  return `- \`${record.timestamp}\` **${record.decision}** ${record.trigger} by @${record.actor}${pr}${sha}${reason}`;
-}
-
-/** Resolve the JSONL audit file path, honoring REVIEW_AUDIT_FILE then RUNNER_TEMP. */
-export function resolveAuditFilePath(env: NodeJS.ProcessEnv): string {
-  if (env.REVIEW_AUDIT_FILE?.trim()) return env.REVIEW_AUDIT_FILE.trim();
-  const base = env.RUNNER_TEMP?.trim() || '/tmp';
-  return `${base}/review-audit.jsonl`;
-}
-
-// ---------------------------------------------------------------------------
-// Emitter (side effects)
-// ---------------------------------------------------------------------------
-
-export interface EmitOptions {
-  /** Path to append the JSONL record to. */
-  auditFilePath: string;
-}
-
-/**
- * Emit the record as a notice + job-summary line + JSONL append. Persisting the
- * record is best-effort: a failed file append must never block a review, so it
- * downgrades to a warning rather than throwing.
- */
-export function emitAuditRecord(record: ReviewAuditRecord, opts: EmitOptions): void {
-  core.notice(formatNotice(record), { title: 'Review request audit' });
-  core.summary.addRaw(`${formatSummaryLine(record)}\n`, false);
-
-  try {
-    mkdirSync(dirname(opts.auditFilePath), { recursive: true });
-    appendFileSync(opts.auditFilePath, `${JSON.stringify(record)}\n`);
-  } catch (err: unknown) {
-    core.warning(
-      `Failed to persist audit record to ${opts.auditFilePath}: ${
-        err instanceof Error ? err.message : String(err)
-      }`,
-    );
-  }
-}
-
-// ---------------------------------------------------------------------------
-// CLI entry point
-// ---------------------------------------------------------------------------
-
-async function main(): Promise<void> {
-  const record = buildAuditRecord(process.env);
-  emitAuditRecord(record, { auditFilePath: resolveAuditFilePath(process.env) });
-  // Surface the chosen path so the workflow can upload it as an artifact.
-  core.setOutput('audit-file', resolveAuditFilePath(process.env));
-  await core.summary.write();
-}
-
-// Guard: only run as CLI when invoked directly as dist/audit-log.js.
-if (process.argv[1]?.endsWith('audit-log.js') && !process.env.VITEST) {
-  main().catch((err: unknown) => {
-    // Audit logging must never fail a review; warn and exit 0.
-    core.warning(`audit-log failed: ${err instanceof Error ? err.message : String(err)}`);
-  });
-}
diff --git a/src/check-staleness/__tests__/check-staleness.test.ts b/src/check-staleness/__tests__/check-staleness.test.ts
deleted file mode 100644
index 27bd778..0000000
--- a/src/check-staleness/__tests__/check-staleness.test.ts
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright The Docker Agent Action authors
-// SPDX-License-Identifier: Apache-2.0
-
-import { beforeEach, describe, expect, it, vi } from 'vitest';
-
-vi.mock('@actions/core');
-
-const { mockGetPull, MockOctokit, constructorTokens } = vi.hoisted(() => {
-  const mockGetPull = vi.fn();
-  const constructorTokens: string[] = [];
-  class MockOctokit {
-    constructor({ auth }: { auth: string }) {
-      constructorTokens.push(auth);
-    }
-    rest = { pulls: { get: mockGetPull } };
-  }
-  return { mockGetPull, MockOctokit, constructorTokens };
-});
-
-vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit }));
-
-import { checkStaleness } from '../index.js';
-
-beforeEach(() => {
-  vi.clearAllMocks();
-  constructorTokens.length = 0;
-});
-
-const opts = { owner: 'docker', repo: 'repo', prNumber: 9 };
-
-describe('checkStaleness', () => {
-  it('flags stale when the requested SHA differs from current head', async () => {
-    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'newsha111' } } });
-
-    const r = await checkStaleness('tok', { ...opts, requestedSha: 'oldsha000' });
-
-    expect(r).toEqual({ requestedSha: 'oldsha000', currentSha: 'newsha111', stale: true });
-    expect(mockGetPull).toHaveBeenCalledWith({ owner: 'docker', repo: 'repo', pull_number: 9 });
-    expect(constructorTokens).toEqual(['tok']);
-  });
-
-  it('is not stale when the requested SHA matches current head', async () => {
-    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'samesha' } } });
-    const r = await checkStaleness('tok', { ...opts, requestedSha: 'samesha' });
-    expect(r.stale).toBe(false);
-  });
-
-  it('is not stale (fail-open) when the requested SHA is empty', async () => {
-    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'newsha' } } });
-    const r = await checkStaleness('tok', { ...opts, requestedSha: '' });
-    expect(r).toEqual({ requestedSha: '', currentSha: 'newsha', stale: false });
-  });
-
-  it('trims surrounding whitespace on the requested SHA before comparing', async () => {
-    mockGetPull.mockResolvedValueOnce({ data: { head: { sha: 'abc' } } });
-    const r = await checkStaleness('tok', { ...opts, requestedSha: '  abc \n' });
-    expect(r.stale).toBe(false);
-    expect(r.requestedSha).toBe('abc');
-  });
-
-  it('is not stale when current head is unknown (missing head.sha)', async () => {
-    mockGetPull.mockResolvedValueOnce({ data: { head: {} } });
-    const r = await checkStaleness('tok', { ...opts, requestedSha: 'oldsha' });
-    expect(r.currentSha).toBe('');
-    expect(r.stale).toBe(false);
-  });
-
-  it('propagates API errors to the caller', async () => {
-    mockGetPull.mockRejectedValueOnce(Object.assign(new Error('Not Found'), { status: 404 }));
-    await expect(checkStaleness('tok', { ...opts, requestedSha: 'x' })).rejects.toThrow(
-      'Not Found',
-    );
-  });
-});
diff --git a/src/check-staleness/index.ts b/src/check-staleness/index.ts
deleted file mode 100644
index b48cbb2..0000000
--- a/src/check-staleness/index.ts
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright The Docker Agent Action authors
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * check-staleness — detect when a PR was force-pushed / rebased between the
- * moment a review was requested and the moment it actually runs.
- *
- * Review requests capture the head SHA at trigger time (e.g. pr_head_sha.txt in
- * the trigger artifact, or github.event.pull_request.head.sha on the direct
- * path), but every review checks out refs/pull/N/head, which always resolves to
- * the *current* head. If the branch was force-pushed in between, the review
- * silently runs against a different commit than the one requested, and the
- * posted review / check run end up pinned to a SHA nobody asked to review.
- *
- * This module re-fetches the current head SHA and compares it to the requested
- * SHA so the workflow can record the SHA actually reviewed and post a notice
- * when the two diverge. The review proceeds against current head (the freshest
- * commit is what should be reviewed) — the safeguard is detection + an honest
- * record, not blocking.
- *
- * Exported:
- *   checkStaleness(token, opts) → StalenessResult
- *
- * CLI (invoked via dist/check-staleness.js): inputs from environment variables:
- *   GITHUB_TOKEN / GH_TOKEN   Token with pull-request read scope
- *   GITHUB_REPOSITORY         "owner/repo"
- *   STALE_PR_NUMBER           PR number
- *   STALE_REQUESTED_SHA       Head SHA captured when the review was requested
- * Outputs (via @actions/core.setOutput):
- *   stale ("true"|"false"), current-sha, requested-sha
- */
-import * as core from '@actions/core';
-import { Octokit } from '@octokit/rest';
-
-export interface StalenessOptions {
-  owner: string;
-  repo: string;
-  prNumber: number;
-  /** Head SHA captured when the review was requested ("" when unknown). */
-  requestedSha: string;
-}
-
-export interface StalenessResult {
-  requestedSha: string;
-  currentSha: string;
-  /** True only when both SHAs are known and differ. */
-  stale: boolean;
-}
-
-/**
- * Fetch the current PR head SHA and compare it to `requestedSha`. When the
- * requested SHA is empty/unknown, staleness cannot be determined and `stale` is
- * false (fail-open: never block on missing data).
- */
-export async function checkStaleness(
-  token: string,
-  opts: StalenessOptions,
-): Promise<StalenessResult> {
-  const octokit = new Octokit({ auth: token });
-  const { data: pr } = await octokit.rest.pulls.get({
-    owner: opts.owner,
-    repo: opts.repo,
-    pull_number: opts.prNumber,
-  });
-  const currentSha = pr.head?.sha ?? '';
-  const requestedSha = opts.requestedSha.trim();
-  const stale = requestedSha !== '' && currentSha !== '' && requestedSha !== currentSha;
-  return { requestedSha, currentSha, stale };
-}
-
-// ---------------------------------------------------------------------------
-// CLI entry point
-// ---------------------------------------------------------------------------
-
-async function main(): Promise<void> {
-  const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? '';
-  const repository = process.env.GITHUB_REPOSITORY ?? '';
-  const prNumber = Number.parseInt(process.env.STALE_PR_NUMBER ?? '', 10);
-  const requestedSha = process.env.STALE_REQUESTED_SHA ?? '';
-
-  if (!token || !Number.isInteger(prNumber) || prNumber <= 0) {
-    core.warning('check-staleness: missing token or PR number — skipping (fail-open)');
-    core.setOutput('stale', 'false');
-    return;
-  }
-
-  const slashIdx = repository.indexOf('/');
-  if (slashIdx < 0) {
-    core.warning(`check-staleness: invalid GITHUB_REPOSITORY '${repository}' — skipping`);
-    core.setOutput('stale', 'false');
-    return;
-  }
-  const owner = repository.slice(0, slashIdx);
-  const repo = repository.slice(slashIdx + 1);
-
-  try {
-    const result = await checkStaleness(token, { owner, repo, prNumber, requestedSha });
-    core.setOutput('stale', String(result.stale));
-    core.setOutput('current-sha', result.currentSha);
-    core.setOutput('requested-sha', result.requestedSha);
-
-    if (result.stale) {
-      core.notice(
-        `PR #${prNumber} was updated after the review was requested: requested ` +
-          `${result.requestedSha.slice(0, 8)}, now reviewing ${result.currentSha.slice(0, 8)}. ` +
-          `The review will run against the latest commit.`,
-        { title: 'Force-push detected — reviewing latest commit' },
-      );
-    } else {
-      core.info(`✅ PR #${prNumber} head is current (${result.currentSha.slice(0, 8)})`);
-    }
-  } catch (err: unknown) {
-    core.warning(
-      `check-staleness failed (fail-open): ${err instanceof Error ? err.message : String(err)}`,
-    );
-    core.setOutput('stale', 'false');
-  }
-}
-
-if (process.argv[1]?.endsWith('check-staleness.js') && !process.env.VITEST) {
-  main().catch((err: unknown) => {
-    core.warning(`check-staleness failed: ${err instanceof Error ? err.message : String(err)}`);
-  });
-}
diff --git a/tsup.config.ts b/tsup.config.ts
index f35003f..48dc2d3 100644
--- a/tsup.config.ts
+++ b/tsup.config.ts
@@ -23,10 +23,8 @@ const src = (name: string) => {
   return p;
 };
 const entry = {
-  'audit-log': src('audit-log'),
   'auto-filter-diff': src('auto-filter-diff'),
   'check-org-membership': src('check-org-membership'),
-  'check-staleness': src('check-staleness'),
   credentials: src('credentials'),
   'filter-diff': src('filter-diff'),
   main: src('main'),

From 9576ee0b7378b1af370349acd63e2d36f8959f22 Mon Sep 17 00:00:00 2001
From: Sayt-0 <louis-dalmorocompta@docker.com>
Date: Fri, 26 Jun 2026 15:43:54 +0200
Subject: [PATCH 4/6] fix(rate-limit): count full PR reviews in the
 rate-anomaly window

The rate-anomaly safeguard only queried issues.listComments and
pulls.listReviewComments, but the bot posts every review through the
Reviews API with no inline marker on the body. Zero-finding APPROVEs and
the timeout/error/LGTM fallbacks therefore counted as 0, and a findings
review counted as N (one per inline comment) instead of 1, so the counter
measured the wrong unit.

Count one unit per LLM run instead:

- Reviews are counted from pulls.listReviews by bot author with a
  non-empty body. The non-empty-body check skips the empty-body review
  entries GitHub creates for standalone inline comments and replies, so a
  review run is counted exactly once and an inline reply is not
  double-counted.
- Replies are counted from the comment endpoints on the -reply markers
  only; the -review finding markers are dropped because that review is
  already counted via the Reviews API.
- Bot author matching now accepts both "docker-agent" and
  "docker-agent[bot]".

Document the deliberate scope of the gate: it guards the review run only.
The conversational reply jobs (reply-to-feedback, reply-to-mention) stay
unthrottled (org-gated, per-PR serialized, and trigger-bound to a human
comment), though their replies still feed the window count. Update
SECURITY.md and the reusable-workflow README to match.
---
 .github/workflows/review-pr.yml             |  28 +++-
 SECURITY.md                                 |  14 +-
 review-pr/README.md                         |   2 +-
 src/rate-limit/__tests__/rate-limit.test.ts | 146 +++++++++++++++-----
 src/rate-limit/index.ts                     | 104 ++++++++++----
 5 files changed, 223 insertions(+), 71 deletions(-)

diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
index 6a55ed2..ebaae12 100644
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@@ -329,12 +329,18 @@ jobs:
           REQUESTER: ${{ github.event.sender.login }}
         run: node "$DOCKER_AGENT_ACTION_ROOT/dist/check-org-membership.js"
 
-      # Rate-anomaly safeguard: count how many docker-agent review/reply comments
-      # were posted on this PR in the recent window. An authorized account can
-      # still drive the bot at high frequency (each request costs an LLM run), so
-      # a burst above the threshold is flagged and the expensive review is skipped.
-      # Runs BEFORE "Create check run" so a throttled request creates no check run
-      # (a skipped review must not surface as a green "PR Review" check).
+      # Rate-anomaly safeguard: count how many docker-agent reviews and replies
+      # landed on this PR in the recent window (full reviews via the Reviews API
+      # plus marker-bearing reply comments). An authorized account can still drive
+      # the bot at high frequency (each request costs an LLM run), so a burst above
+      # the threshold is flagged and the expensive review is skipped. Runs BEFORE
+      # "Create check run" so a throttled request creates no check run (a skipped
+      # review must not surface as a green "PR Review" check).
+      #
+      # Scope: this gate guards the review run only. The conversational reply jobs
+      # (reply-to-feedback, reply-to-mention) are intentionally left unthrottled
+      # (see the note on each). The window count itself already spans reviews AND
+      # replies, so heavy reply traffic still throttles subsequent review runs.
       - name: Check rate anomaly
         id: rate
         if: |
@@ -440,6 +446,12 @@ jobs:
               core.warning(`Failed to update check run: ${error.message}`);
             }
 
+  # No rate-anomaly gate here (unlike the review job): conversational replies are
+  # gated by org membership and serialized per-PR by the `concurrency:` group, and
+  # each reply requires a distinct human comment to trigger it, so they cannot be
+  # fanned out the way review requests can. The rate-limit window count does still
+  # include these replies, so a reply burst throttles subsequent review runs.
+  # Extending the same gate to the reply jobs is a deferred follow-up.
   reply-to-feedback:
     needs: [resolve-context]
     if: |
@@ -782,6 +794,10 @@ jobs:
           path: feedback/
           retention-days: 90
 
+  # No rate-anomaly gate here, same rationale as reply-to-feedback above:
+  # org-gated, per-PR-serialized, and trigger-bound to a human comment. The
+  # rate-limit window count includes these replies, so they still feed the
+  # review job's throttle. Gating the reply jobs themselves is a deferred follow-up.
   reply-to-mention:
     needs: [resolve-context]
     if: |
diff --git a/SECURITY.md b/SECURITY.md
index f67c940..0580771 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -107,10 +107,16 @@ bound request frequency:
   collapse same-trigger bursts (e.g. rapid force-pushes, repeated review
   requests) instead of running them in parallel. Groups are scoped per trigger
   intent so a quick conversational reply is never queued behind a long review.
-- **`src/rate-limit`** counts docker-agent review/reply comments posted on the PR
-  within a sliding window (default 600 s) and, when the count crosses a threshold
-  (default 8), flags a rate anomaly. The review job skips the expensive review.
-  The check **fails open** so an API error never blocks a legitimate review.
+- **`src/rate-limit`** counts docker-agent review/reply outputs on the PR within
+  a sliding window (default 600 s) and, when the count crosses a threshold
+  (default 8), flags a rate anomaly. It counts one unit per LLM run: full reviews
+  via the Reviews API (`pulls.listReviews`, by bot author, covering findings,
+  zero-finding APPROVEs, and timeout/error/LGTM fallbacks, none of which carry an
+  inline marker) plus marker-bearing reply comments. The review job skips the
+  expensive review on a flagged anomaly; the conversational reply jobs are not
+  gated (they are org-gated and per-PR serialized), though their replies still
+  count toward the window. The check **fails open** so an API error never blocks a
+  legitimate review.
 - The existing in-action **cache lock** (`pr-review-lock-<repo>-<pr>-*`, 600 s
   TTL) prevents concurrent reviews from racing on the same PR.
 
diff --git a/review-pr/README.md b/review-pr/README.md
index e572b5c..8ae7391 100644
--- a/review-pr/README.md
+++ b/review-pr/README.md
@@ -205,7 +205,7 @@ The workflow YAML examples above are the complete, recommended setup. The reusab
 | **PR vs issue comment** | The reusable workflow checks `github.event.issue.pull_request` internally. Plain issue comments on non-PR issues are silently ignored. |
 | **Draft PR skipping** | Draft PRs are skipped internally — no caller condition needed. |
 | **Concurrent review guard** | A cache-based lock (`pr-review-lock-<repo>-<pr>-*`) prevents duplicate reviews from racing on the same PR. |
-| **Rate-anomaly throttling** | Per-PR `concurrency:` groups serialize same-trigger bursts, and a rate-limit check skips the review when too many docker-agent review/reply comments land on one PR within the window. No caller configuration needed. |
+| **Rate-anomaly throttling** | Per-PR `concurrency:` groups serialize same-trigger bursts, and a rate-limit check skips the review when too many docker-agent reviews and replies land on one PR within the window. No caller configuration needed. |
 
 **The only decision callers make** is which setup pattern to use: 1-workflow for same-repo PRs, 2-workflow for repos that accept fork PRs. That distinction is the caller's responsibility because it controls which event path delivers OIDC credentials to the reusable workflow.
 
diff --git a/src/rate-limit/__tests__/rate-limit.test.ts b/src/rate-limit/__tests__/rate-limit.test.ts
index 0bd9787..2f37957 100644
--- a/src/rate-limit/__tests__/rate-limit.test.ts
+++ b/src/rate-limit/__tests__/rate-limit.test.ts
@@ -5,20 +5,28 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 vi.mock('@actions/core');
 
-const { mockPaginate, mockListComments, mockListReviewComments, MockOctokit } = vi.hoisted(() => {
-  const mockListComments = { endpoint: 'issues.listComments' };
-  const mockListReviewComments = { endpoint: 'pulls.listReviewComments' };
-  const mockPaginate = vi.fn();
-
-  class MockOctokit {
-    paginate = mockPaginate;
-    rest = {
-      issues: { listComments: mockListComments },
-      pulls: { listReviewComments: mockListReviewComments },
+const { mockPaginate, mockListComments, mockListReviewComments, mockListReviews, MockOctokit } =
+  vi.hoisted(() => {
+    const mockListComments = { endpoint: 'issues.listComments' };
+    const mockListReviewComments = { endpoint: 'pulls.listReviewComments' };
+    const mockListReviews = { endpoint: 'pulls.listReviews' };
+    const mockPaginate = vi.fn();
+
+    class MockOctokit {
+      paginate = mockPaginate;
+      rest = {
+        issues: { listComments: mockListComments },
+        pulls: { listReviewComments: mockListReviewComments, listReviews: mockListReviews },
+      };
+    }
+    return {
+      mockPaginate,
+      mockListComments,
+      mockListReviewComments,
+      mockListReviews,
+      MockOctokit,
     };
-  }
-  return { mockPaginate, mockListComments, mockListReviewComments, MockOctokit };
-});
+  });
 
 vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit }));
 
@@ -31,14 +39,23 @@ const BOT = 'docker-agent';
 const REVIEW_MARKER = '<!-- docker-agent-review -->';
 const REPLY_MARKER = '<!-- docker-agent-review-reply -->';
 
-function agentComment(secAgo: number, marker = REVIEW_MARKER) {
-  return { user: { login: BOT }, body: `Review body ${marker}`, created_at: within(secAgo) };
+// A conversational reply (issue comment or inline review-comment reply): one per
+// reply LLM run, identified by the reply marker.
+function reply(secAgo: number, marker = REPLY_MARKER, login = BOT) {
+  return { user: { login }, body: `Reply ${marker}`, created_at: within(secAgo) };
+}
+
+// A full review posted via the Reviews API: one per review LLM run, identified by
+// bot author plus a non-empty assessment/status body (no inline marker).
+function review(secAgo: number, body = '### Assessment: 🟢 APPROVE', login = BOT) {
+  return { user: { login }, body, submitted_at: within(secAgo) };
 }
 
 // Route paginate() to the right dataset based on which endpoint it was given.
-function routePaginate(issue: unknown[], review: unknown[]) {
+function routePaginate(issue: unknown[], reviewComments: unknown[], reviews: unknown[]) {
   mockPaginate.mockImplementation((endpoint: unknown) => {
-    if (endpoint === mockListReviewComments) return Promise.resolve(review);
+    if (endpoint === mockListReviewComments) return Promise.resolve(reviewComments);
+    if (endpoint === mockListReviews) return Promise.resolve(reviews);
     return Promise.resolve(issue);
   });
 }
@@ -58,10 +75,11 @@ describe('detectRateAnomaly', () => {
     nowMs: NOW,
   };
 
-  it('counts agent review + reply comments within the window across both comment types', async () => {
+  it('counts full reviews plus reply comments within the window', async () => {
     routePaginate(
-      [agentComment(60), agentComment(120, REPLY_MARKER)],
-      [agentComment(30), agentComment(90)],
+      [reply(120)], // top-level reply (issue comment)
+      [reply(60)], // inline review-comment reply
+      [review(30), review(90)], // two full review runs
     );
 
     const r = await detectRateAnomaly('tok', base);
@@ -71,44 +89,100 @@ describe('detectRateAnomaly', () => {
     expect(r.threshold).toBe(3);
   });
 
+  it('counts a zero-finding APPROVE review (no marker, non-empty body)', async () => {
+    // Regression: review bodies have no inline marker and zero-finding reviews
+    // post no inline comments, so this is invisible to the comment endpoints.
+    routePaginate([], [], [review(30, '### Assessment: 🟢 APPROVE')]);
+    const r = await detectRateAnomaly('tok', { ...base, threshold: 1 });
+    expect(r.count).toBe(1);
+    expect(r.anomalous).toBe(true);
+  });
+
+  it('counts timeout / error / LGTM fallback reviews (no marker)', async () => {
+    routePaginate(
+      [],
+      [],
+      [
+        review(30, '⏱️ **PR Review Timed Out** — …'),
+        review(60, '❌ **PR Review Failed** — …'),
+        review(90, '🟢 **No issues found** — LGTM!'),
+      ],
+    );
+    const r = await detectRateAnomaly('tok', base);
+    expect(r.count).toBe(3);
+    expect(r.anomalous).toBe(true);
+  });
+
   it('is not anomalous below the threshold', async () => {
-    routePaginate([agentComment(60)], [agentComment(30)]);
+    routePaginate([reply(120)], [], [review(30)]);
     const r = await detectRateAnomaly('tok', base);
     expect(r.count).toBe(2);
     expect(r.anomalous).toBe(false);
   });
 
-  it('ignores comments outside the window (created before windowStart)', async () => {
+  it('does not double-count a review by also counting its inline finding comments', async () => {
+    // A findings review is one run: the review object (counted) carries N inline
+    // comments with the review marker (NOT counted — they are part of that run).
     routePaginate(
-      [agentComment(60), agentComment(2000 /* 33min ago, outside 600s */)],
-      [agentComment(30)],
+      [],
+      [reply(40, REVIEW_MARKER), reply(40, REVIEW_MARKER), reply(40, REVIEW_MARKER)],
+      [review(40, '### Assessment: 🟡 NEEDS ATTENTION')],
     );
+    const r = await detectRateAnomaly('tok', { ...base, threshold: 1 });
+    expect(r.count).toBe(1);
+  });
+
+  it('ignores empty-body review entries (standalone inline comments/replies)', async () => {
+    // Inline comments/replies surface in listReviews as empty-body review entries;
+    // they must not be counted as review runs.
+    routePaginate([], [], [review(30, ''), review(60, '   ')]);
     const r = await detectRateAnomaly('tok', base);
-    expect(r.count).toBe(2);
+    expect(r.count).toBe(0);
+    expect(r.anomalous).toBe(false);
+  });
+
+  it('ignores reviews and replies outside the window', async () => {
+    routePaginate([reply(2000 /* 33min ago */)], [], [review(30), review(2000 /* outside 600s */)]);
+    const r = await detectRateAnomaly('tok', base);
+    expect(r.count).toBe(1);
   });
 
-  it('ignores comments from other users', async () => {
+  it('ignores reviews and comments from other users', async () => {
     routePaginate(
-      [{ user: { login: 'mallory' }, body: `spam ${REVIEW_MARKER}`, created_at: within(10) }],
+      [{ user: { login: 'mallory' }, body: `spam ${REPLY_MARKER}`, created_at: within(10) }],
       [],
+      [review(20, '### Assessment: 🟢 APPROVE', 'mallory')],
     );
     const r = await detectRateAnomaly('tok', base);
     expect(r.count).toBe(0);
     expect(r.anomalous).toBe(false);
   });
 
-  it('ignores agent comments that lack a review marker (e.g. ordinary chatter)', async () => {
+  it('matches the docker-agent[bot] App-token identity', async () => {
+    routePaginate(
+      [reply(60, REPLY_MARKER, 'docker-agent[bot]')],
+      [],
+      [review(30, '### Assessment: 🟢 APPROVE', 'docker-agent[bot]')],
+    );
+    const r = await detectRateAnomaly('tok', { ...base, threshold: 2 });
+    expect(r.count).toBe(2);
+    expect(r.anomalous).toBe(true);
+  });
+
+  it('ignores agent reply comments that lack a reply marker (ordinary chatter)', async () => {
     routePaginate(
       [{ user: { login: BOT }, body: 'just a plain comment', created_at: within(10) }],
       [],
+      [],
     );
     const r = await detectRateAnomaly('tok', base);
     expect(r.count).toBe(0);
   });
 
-  it('counts legacy cagent markers during the migration window', async () => {
+  it('counts the legacy cagent reply marker during the migration window', async () => {
     routePaginate(
-      [{ user: { login: BOT }, body: 'old <!-- cagent-review -->', created_at: within(10) }],
+      [{ user: { login: BOT }, body: 'old <!-- cagent-review-reply -->', created_at: within(10) }],
+      [],
       [],
     );
     const r = await detectRateAnomaly('tok', { ...base, threshold: 1 });
@@ -116,8 +190,8 @@ describe('detectRateAnomaly', () => {
     expect(r.anomalous).toBe(true);
   });
 
-  it('passes a since timestamp derived from the window to the API', async () => {
-    routePaginate([], []);
+  it('passes a since timestamp to the comment endpoints', async () => {
+    routePaginate([], [], []);
     await detectRateAnomaly('tok', base);
     const issueCall = mockPaginate.mock.calls.find((c) => c[0] === mockListComments);
     expect(issueCall?.[1]).toMatchObject({
@@ -127,4 +201,12 @@ describe('detectRateAnomaly', () => {
       since: new Date(NOW - 600 * 1000).toISOString(),
     });
   });
+
+  it('queries listReviews without a since parameter (the API has none)', async () => {
+    routePaginate([], [], []);
+    await detectRateAnomaly('tok', base);
+    const reviewCall = mockPaginate.mock.calls.find((c) => c[0] === mockListReviews);
+    expect(reviewCall?.[1]).toMatchObject({ owner: 'docker', repo: 'repo', pull_number: 5 });
+    expect(reviewCall?.[1]).not.toHaveProperty('since');
+  });
 });
diff --git a/src/rate-limit/index.ts b/src/rate-limit/index.ts
index 36ebdb7..a039da7 100644
--- a/src/rate-limit/index.ts
+++ b/src/rate-limit/index.ts
@@ -8,14 +8,23 @@
  * The existing per-PR cache lock (review-pr/action.yml) only stops *concurrent*
  * reviews and only on the review path; an authorized account can still drive the
  * bot at high frequency (each request costs an LLM run). This module adds a
- * frequency check: it counts how many docker-agent review/reply comments were
- * posted on the PR within a recent time window and flags the request as a rate
- * anomaly when that count crosses a threshold. The workflow gates the expensive
- * review step on the result, so a burst is throttled rather than run N times.
+ * frequency check: it counts how many docker-agent review/reply outputs landed
+ * on the PR within a recent time window and flags the request as a rate anomaly
+ * when that count crosses a threshold. The workflow gates the expensive review
+ * step on the result, so a burst is throttled rather than run N times.
  *
- * Counting the bot's own marker comments (rather than raw triggers) is the
- * signal that is reliably observable with the repo-scoped token already present,
- * and it directly measures how hard the bot is being driven on that PR.
+ * Counting is per LLM run, so each run contributes exactly one unit:
+ *   - Reviews are posted via the Reviews API (POST /pulls/{n}/reviews) with no
+ *     inline marker — a findings review, a zero-finding APPROVE, and the
+ *     timeout/error/LGTM fallbacks all land there. They are counted from
+ *     `pulls.listReviews` by bot author (a real review run always carries an
+ *     assessment/status body); the inline finding comments such a review carries
+ *     are deliberately not counted, since that would be N units per single run.
+ *   - Replies are posted as issue comments or inline review-comment replies,
+ *     each carrying a `-reply` marker, and are counted from the comment
+ *     endpoints (one marker per reply run).
+ * Both signals are observable with the repo-scoped token already present and
+ * together measure how hard the bot is being driven on that PR.
  *
  * Exported:
  *   detectRateAnomaly(token, opts) → RateAnomalyResult
@@ -32,15 +41,21 @@
 import * as core from '@actions/core';
 import { Octokit } from '@octokit/rest';
 
-// Review markers also matched by review-pr.yml / review-pr/action.yml. Counting
-// any of these captures both full reviews and conversational replies, and the
-// legacy cagent-* markers keep older threads countable during migration.
-const REVIEW_MARKERS = [
-  '<!-- docker-agent-review -->',
-  '<!-- docker-agent-review-reply -->',
-  '<!-- cagent-review -->',
-  '<!-- cagent-review-reply -->',
-];
+// Reply markers identify the bot's conversational replies — one per reply LLM
+// run — posted as issue comments or inline review-comment replies. Full reviews
+// carry no marker on the review body and are counted separately (see
+// detectRateAnomaly), so the review/finding markers are intentionally absent
+// here: counting them would double-count a review run that already shows up via
+// the Reviews API. The legacy cagent-* marker keeps older reply threads
+// countable during migration.
+const REPLY_MARKERS = ['<!-- docker-agent-review-reply -->', '<!-- cagent-review-reply -->'];
+
+// GitHub presents the bot identity as "docker-agent" when posting with a machine
+// user token, or "docker-agent[bot]" through a GitHub App installation token.
+// Match both so the count is correct regardless of which token posted.
+function matchesBotLogin(login: string | null | undefined, botLogin: string): boolean {
+  return login === botLogin || login === `${botLogin}[bot]`;
+}
 
 export interface RateAnomalyOptions {
   owner: string;
@@ -69,19 +84,39 @@ interface CommentLike {
   created_at?: string;
 }
 
-function isAgentReviewComment(c: CommentLike, botLogin: string, windowStartMs: number): boolean {
-  if (c.user?.login !== botLogin) return false;
+interface ReviewLike {
+  user?: { login?: string } | null;
+  body?: string | null;
+  submitted_at?: string | null;
+}
+
+function isAgentReplyComment(c: CommentLike, botLogin: string, windowStartMs: number): boolean {
+  if (!matchesBotLogin(c.user?.login, botLogin)) return false;
   const body = c.body ?? '';
-  if (!REVIEW_MARKERS.some((m) => body.includes(m))) return false;
+  if (!REPLY_MARKERS.some((m) => body.includes(m))) return false;
   if (!c.created_at) return false;
   const created = Date.parse(c.created_at);
   return Number.isFinite(created) && created >= windowStartMs;
 }
 
+function isAgentReview(r: ReviewLike, botLogin: string, windowStartMs: number): boolean {
+  if (!matchesBotLogin(r.user?.login, botLogin)) return false;
+  // A real review run always carries an assessment/status body ("### Assessment:
+  // …", or a timeout/error/LGTM fallback). Standalone inline comments and replies
+  // surface in this endpoint as empty-body review entries; skipping them keeps
+  // each review run counted exactly once and avoids double-counting an inline
+  // reply (already counted via its reply marker on the comment endpoints).
+  if (!r.body || r.body.trim().length === 0) return false;
+  if (!r.submitted_at) return false;
+  const submitted = Date.parse(r.submitted_at);
+  return Number.isFinite(submitted) && submitted >= windowStartMs;
+}
+
 /**
- * Count docker-agent review/reply comments on `prNumber` created within the last
- * `windowSeconds`, across both issue comments and inline review comments, and
- * decide whether the count constitutes a rate anomaly.
+ * Count docker-agent review/reply outputs on `prNumber` within the last
+ * `windowSeconds` — full reviews (via the Reviews API) plus conversational
+ * replies (issue comments and inline review-comment replies) — and decide
+ * whether the count constitutes a rate anomaly.
  */
 export async function detectRateAnomaly(
   token: string,
@@ -92,8 +127,9 @@ export async function detectRateAnomaly(
   const windowStartMs = now - opts.windowSeconds * 1000;
   const since = new Date(windowStartMs).toISOString();
 
-  // `since` filters server-side by updated_at; the per-comment created_at check
-  // below enforces the precise creation window. paginate() handles busy PRs.
+  // `since` filters the comment endpoints server-side by updated_at; the
+  // per-comment created_at check below enforces the precise window. paginate()
+  // handles busy PRs.
   const issueComments: CommentLike[] = await octokit.paginate(octokit.rest.issues.listComments, {
     owner: opts.owner,
     repo: opts.repo,
@@ -111,10 +147,22 @@ export async function detectRateAnomaly(
       per_page: 100,
     },
   );
+  // The Reviews API has no `since` parameter, so paginate and filter by
+  // submitted_at below. This is the only endpoint where a review run is
+  // observable: the bot posts every review (findings, zero-finding APPROVE, and
+  // the timeout/error/LGTM fallbacks) here with no inline marker on the body.
+  const reviews: ReviewLike[] = await octokit.paginate(octokit.rest.pulls.listReviews, {
+    owner: opts.owner,
+    repo: opts.repo,
+    pull_number: opts.prNumber,
+    per_page: 100,
+  });
 
-  const count = [...issueComments, ...reviewComments].filter((c) =>
-    isAgentReviewComment(c, opts.botLogin, windowStartMs),
+  const replyCount = [...issueComments, ...reviewComments].filter((c) =>
+    isAgentReplyComment(c, opts.botLogin, windowStartMs),
   ).length;
+  const reviewCount = reviews.filter((r) => isAgentReview(r, opts.botLogin, windowStartMs)).length;
+  const count = replyCount + reviewCount;
 
   return {
     count,
@@ -175,12 +223,12 @@ async function main(): Promise<void> {
 
     if (result.anomalous) {
       core.warning(
-        `Rate anomaly on PR #${prNumber}: ${result.count} agent review/reply comments in the ` +
+        `Rate anomaly on PR #${prNumber}: ${result.count} agent reviews/replies in the ` +
           `last ${windowSeconds}s (threshold ${threshold}). Throttling this request.`,
       );
     } else {
       core.info(
-        `✅ Rate OK on PR #${prNumber}: ${result.count}/${threshold} agent comments in ${windowSeconds}s`,
+        `✅ Rate OK on PR #${prNumber}: ${result.count}/${threshold} agent reviews/replies in ${windowSeconds}s`,
       );
     }
   } catch (err: unknown) {

From bf2774f91515e180ddd6720bcfee0113c2e24064 Mon Sep 17 00:00:00 2001
From: Sayt-0 <louis-dalmorocompta@docker.com>
Date: Fri, 26 Jun 2026 17:57:59 +0200
Subject: [PATCH 5/6] fix(rate-limit): address review comments from derekmisler

- add input guards to detectRateAnomaly for windowSeconds and threshold
  (NaN/Infinity/non-positive values now throw RangeError instead of
  propagating silently or permanently throttling all reviews)
- parallelize the three paginate calls with Promise.all
- replace paginate() with paginate.iterator + early break for listReviews
  to avoid unbounded API calls on heavily-reviewed PRs
- emit all four outputs (anomalous, count, window, threshold) in every
  fail-open path (missing token, invalid repo, API error catch block)
- export main() and add test coverage: input guard tests, and six
  main() tests covering env-var parsing, all fail-open paths, and
  the four-output invariant
- add github.run_id fallback to self-review-pr-trigger concurrency key
---
 .github/workflows/self-review-pr-trigger.yml |   2 +-
 src/rate-limit/__tests__/rate-limit.test.ts  | 175 ++++++++++++++++---
 src/rate-limit/index.ts                      |  75 +++++---
 3 files changed, 198 insertions(+), 54 deletions(-)

diff --git a/.github/workflows/self-review-pr-trigger.yml b/.github/workflows/self-review-pr-trigger.yml
index b2d411c..8a98a7f 100644
--- a/.github/workflows/self-review-pr-trigger.yml
+++ b/.github/workflows/self-review-pr-trigger.yml
@@ -14,7 +14,7 @@ on:
 # Distinct review comments stay independent (the comment id keys them into their
 # own group) so a real reply is never dropped by another comment on the same PR.
 concurrency:
-  group: pr-review-trigger-${{ github.event.pull_request.number }}-${{ github.event.comment.id || 'review-request' }}
+  group: pr-review-trigger-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.comment.id || 'review-request' }}
   cancel-in-progress: true
 
 permissions: {}
diff --git a/src/rate-limit/__tests__/rate-limit.test.ts b/src/rate-limit/__tests__/rate-limit.test.ts
index 2f37957..74694b1 100644
--- a/src/rate-limit/__tests__/rate-limit.test.ts
+++ b/src/rate-limit/__tests__/rate-limit.test.ts
@@ -1,36 +1,45 @@
 // Copyright The Docker Agent Action authors
 // SPDX-License-Identifier: Apache-2.0
 
+import * as core from '@actions/core';
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 vi.mock('@actions/core');
 
-const { mockPaginate, mockListComments, mockListReviewComments, mockListReviews, MockOctokit } =
-  vi.hoisted(() => {
-    const mockListComments = { endpoint: 'issues.listComments' };
-    const mockListReviewComments = { endpoint: 'pulls.listReviewComments' };
-    const mockListReviews = { endpoint: 'pulls.listReviews' };
-    const mockPaginate = vi.fn();
-
-    class MockOctokit {
-      paginate = mockPaginate;
-      rest = {
-        issues: { listComments: mockListComments },
-        pulls: { listReviewComments: mockListReviewComments, listReviews: mockListReviews },
-      };
-    }
-    return {
-      mockPaginate,
-      mockListComments,
-      mockListReviewComments,
-      mockListReviews,
-      MockOctokit,
+const {
+  mockPaginate,
+  mockPaginateIterator,
+  mockListComments,
+  mockListReviewComments,
+  mockListReviews,
+  MockOctokit,
+} = vi.hoisted(() => {
+  const mockListComments = { endpoint: 'issues.listComments' };
+  const mockListReviewComments = { endpoint: 'pulls.listReviewComments' };
+  const mockListReviews = { endpoint: 'pulls.listReviews' };
+  const mockPaginateIterator = vi.fn();
+  const mockPaginate = Object.assign(vi.fn(), { iterator: mockPaginateIterator });
+
+  class MockOctokit {
+    paginate = mockPaginate;
+    rest = {
+      issues: { listComments: mockListComments },
+      pulls: { listReviewComments: mockListReviewComments, listReviews: mockListReviews },
     };
-  });
+  }
+  return {
+    mockPaginate,
+    mockPaginateIterator,
+    mockListComments,
+    mockListReviewComments,
+    mockListReviews,
+    MockOctokit,
+  };
+});
 
 vi.mock('@octokit/rest', () => ({ Octokit: MockOctokit }));
 
-import { detectRateAnomaly } from '../index.js';
+import { detectRateAnomaly, main } from '../index.js';
 
 const NOW = Date.parse('2026-06-24T10:10:00.000Z');
 const within = (secAgo: number) => new Date(NOW - secAgo * 1000).toISOString();
@@ -51,13 +60,15 @@ function review(secAgo: number, body = '### Assessment: 🟢 APPROVE', login = B
   return { user: { login }, body, submitted_at: within(secAgo) };
 }
 
-// Route paginate() to the right dataset based on which endpoint it was given.
+// Route paginate() and paginate.iterator() to the right dataset based on endpoint.
 function routePaginate(issue: unknown[], reviewComments: unknown[], reviews: unknown[]) {
   mockPaginate.mockImplementation((endpoint: unknown) => {
     if (endpoint === mockListReviewComments) return Promise.resolve(reviewComments);
-    if (endpoint === mockListReviews) return Promise.resolve(reviews);
     return Promise.resolve(issue);
   });
+  mockPaginateIterator.mockImplementation(async function* () {
+    yield { data: reviews };
+  });
 }
 
 beforeEach(() => {
@@ -202,11 +213,119 @@ describe('detectRateAnomaly', () => {
     });
   });
 
-  it('queries listReviews without a since parameter (the API has none)', async () => {
+  it('queries listReviews via paginate.iterator without a since parameter (API has none)', async () => {
     routePaginate([], [], []);
     await detectRateAnomaly('tok', base);
-    const reviewCall = mockPaginate.mock.calls.find((c) => c[0] === mockListReviews);
-    expect(reviewCall?.[1]).toMatchObject({ owner: 'docker', repo: 'repo', pull_number: 5 });
-    expect(reviewCall?.[1]).not.toHaveProperty('since');
+    const iteratorCall = mockPaginateIterator.mock.calls.find((c) => c[0] === mockListReviews);
+    expect(iteratorCall?.[1]).toMatchObject({ owner: 'docker', repo: 'repo', pull_number: 5 });
+    expect(iteratorCall?.[1]).not.toHaveProperty('since');
+  });
+
+  it('throws RangeError for non-positive windowSeconds', async () => {
+    routePaginate([], [], []);
+    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: 0 })).rejects.toThrow(RangeError);
+    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: -1 })).rejects.toThrow(RangeError);
+    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: NaN })).rejects.toThrow(RangeError);
+  });
+
+  it('throws RangeError for non-positive threshold', async () => {
+    routePaginate([], [], []);
+    await expect(detectRateAnomaly('tok', { ...base, threshold: 0 })).rejects.toThrow(RangeError);
+    await expect(detectRateAnomaly('tok', { ...base, threshold: -1 })).rejects.toThrow(RangeError);
+  });
+});
+
+describe('main()', () => {
+  const mockedSetOutput = vi.mocked(core.setOutput);
+
+  beforeEach(() => {
+    vi.unstubAllEnvs();
+  });
+
+  it('fails open and emits all four outputs when token is missing', async () => {
+    vi.stubEnv('GITHUB_TOKEN', '');
+    vi.stubEnv('GH_TOKEN', '');
+    vi.stubEnv('RATE_PR_NUMBER', '5');
+    vi.stubEnv('RATE_WINDOW_SECONDS', '300');
+    vi.stubEnv('RATE_MAX_REQUESTS', '4');
+
+    await main();
+
+    expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false');
+    expect(mockedSetOutput).toHaveBeenCalledWith('count', '0');
+    expect(mockedSetOutput).toHaveBeenCalledWith('window', '300');
+    expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '4');
+  });
+
+  it('fails open and emits all four outputs when PR number is missing', async () => {
+    vi.stubEnv('GITHUB_TOKEN', 'tok');
+    vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo');
+    vi.stubEnv('RATE_PR_NUMBER', 'not-a-number');
+
+    await main();
+
+    expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false');
+    expect(mockedSetOutput).toHaveBeenCalledWith('count', '0');
+    expect(mockedSetOutput).toHaveBeenCalledWith('window', '600');
+    expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8');
+  });
+
+  it('fails open and emits all four outputs when GITHUB_REPOSITORY is invalid', async () => {
+    vi.stubEnv('GITHUB_TOKEN', 'tok');
+    vi.stubEnv('GITHUB_REPOSITORY', 'no-slash-here');
+    vi.stubEnv('RATE_PR_NUMBER', '5');
+
+    await main();
+
+    expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false');
+    expect(mockedSetOutput).toHaveBeenCalledWith('count', '0');
+    expect(mockedSetOutput).toHaveBeenCalledWith('window', '600');
+    expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8');
+  });
+
+  it('emits all four outputs on a successful non-anomalous run', async () => {
+    vi.stubEnv('GITHUB_TOKEN', 'tok');
+    vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo');
+    vi.stubEnv('RATE_PR_NUMBER', '5');
+    vi.stubEnv('RATE_WINDOW_SECONDS', '600');
+    vi.stubEnv('RATE_MAX_REQUESTS', '8');
+    routePaginate([], [], []);
+
+    await main();
+
+    expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false');
+    expect(mockedSetOutput).toHaveBeenCalledWith('count', '0');
+    expect(mockedSetOutput).toHaveBeenCalledWith('window', '600');
+    expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8');
+  });
+
+  it('emits all four outputs on a successful anomalous run', async () => {
+    vi.stubEnv('GITHUB_TOKEN', 'tok');
+    vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo');
+    vi.stubEnv('RATE_PR_NUMBER', '5');
+    vi.stubEnv('RATE_WINDOW_SECONDS', '600');
+    vi.stubEnv('RATE_MAX_REQUESTS', '2');
+    routePaginate([reply(60), reply(120)], [], []);
+
+    await main();
+
+    expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'true');
+    expect(mockedSetOutput).toHaveBeenCalledWith('count', '2');
+    expect(mockedSetOutput).toHaveBeenCalledWith('window', '600');
+    expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '2');
+  });
+
+  it('fails open and emits all four outputs when the API throws', async () => {
+    vi.stubEnv('GITHUB_TOKEN', 'tok');
+    vi.stubEnv('GITHUB_REPOSITORY', 'docker/repo');
+    vi.stubEnv('RATE_PR_NUMBER', '5');
+    mockPaginate.mockRejectedValue(new Error('API error'));
+
+    await main();
+
+    expect(mockedSetOutput).toHaveBeenCalledWith('anomalous', 'false');
+    expect(mockedSetOutput).toHaveBeenCalledWith('count', '0');
+    expect(mockedSetOutput).toHaveBeenCalledWith('window', '600');
+    expect(mockedSetOutput).toHaveBeenCalledWith('threshold', '8');
   });
 });
diff --git a/src/rate-limit/index.ts b/src/rate-limit/index.ts
index a039da7..a9fbd79 100644
--- a/src/rate-limit/index.ts
+++ b/src/rate-limit/index.ts
@@ -122,41 +122,60 @@ export async function detectRateAnomaly(
   token: string,
   opts: RateAnomalyOptions,
 ): Promise<RateAnomalyResult> {
+  if (!Number.isFinite(opts.windowSeconds) || opts.windowSeconds <= 0) {
+    throw new RangeError(`windowSeconds must be a positive finite number, got ${opts.windowSeconds}`);
+  }
+  if (!Number.isFinite(opts.threshold) || opts.threshold <= 0) {
+    throw new RangeError(`threshold must be a positive integer, got ${opts.threshold}`);
+  }
+
   const octokit = new Octokit({ auth: token });
   const now = opts.nowMs ?? Date.now();
   const windowStartMs = now - opts.windowSeconds * 1000;
   const since = new Date(windowStartMs).toISOString();
 
   // `since` filters the comment endpoints server-side by updated_at; the
-  // per-comment created_at check below enforces the precise window. paginate()
-  // handles busy PRs.
-  const issueComments: CommentLike[] = await octokit.paginate(octokit.rest.issues.listComments, {
-    owner: opts.owner,
-    repo: opts.repo,
-    issue_number: opts.prNumber,
-    since,
-    per_page: 100,
-  });
-  const reviewComments: CommentLike[] = await octokit.paginate(
-    octokit.rest.pulls.listReviewComments,
-    {
+  // per-comment created_at check below enforces the precise window. All three
+  // fetches run in parallel to cut wall-clock time.
+  const [issueComments, reviewComments, reviews] = await Promise.all([
+    octokit.paginate(octokit.rest.issues.listComments, {
+      owner: opts.owner,
+      repo: opts.repo,
+      issue_number: opts.prNumber,
+      since,
+      per_page: 100,
+    }) as Promise<CommentLike[]>,
+    octokit.paginate(octokit.rest.pulls.listReviewComments, {
       owner: opts.owner,
       repo: opts.repo,
       pull_number: opts.prNumber,
       since,
       per_page: 100,
-    },
-  );
-  // The Reviews API has no `since` parameter, so paginate and filter by
-  // submitted_at below. This is the only endpoint where a review run is
-  // observable: the bot posts every review (findings, zero-finding APPROVE, and
-  // the timeout/error/LGTM fallbacks) here with no inline marker on the body.
-  const reviews: ReviewLike[] = await octokit.paginate(octokit.rest.pulls.listReviews, {
-    owner: opts.owner,
-    repo: opts.repo,
-    pull_number: opts.prNumber,
-    per_page: 100,
-  });
+    }) as Promise<CommentLike[]>,
+    // The Reviews API has no `since` parameter. Use paginate.iterator and stop
+    // early once the entire page predates the window (GitHub returns reviews
+    // oldest-first), avoiding unbounded API calls on heavily-reviewed PRs.
+    (async (): Promise<ReviewLike[]> => {
+      const acc: ReviewLike[] = [];
+      for await (const page of octokit.paginate.iterator(octokit.rest.pulls.listReviews, {
+        owner: opts.owner,
+        repo: opts.repo,
+        pull_number: opts.prNumber,
+        per_page: 100,
+      })) {
+        acc.push(...(page.data as ReviewLike[]));
+        if (
+          page.data.every(
+            (r) =>
+              !(r as ReviewLike).submitted_at ||
+              Date.parse((r as ReviewLike).submitted_at!) < windowStartMs,
+          )
+        )
+          break;
+      }
+      return acc;
+    })(),
+  ]);
 
   const replyCount = [...issueComments, ...reviewComments].filter((c) =>
     isAgentReplyComment(c, opts.botLogin, windowStartMs),
@@ -181,7 +200,7 @@ function parsePositiveInt(value: string | undefined, fallback: number): number {
   return Number.isInteger(n) && n > 0 ? n : fallback;
 }
 
-async function main(): Promise<void> {
+export async function main(): Promise<void> {
   const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? '';
   const repository = process.env.GITHUB_REPOSITORY ?? '';
   const prNumber = Number.parseInt(process.env.RATE_PR_NUMBER ?? '', 10);
@@ -194,6 +213,8 @@ async function main(): Promise<void> {
     core.warning('rate-limit: missing token or PR number — skipping rate check (fail-open)');
     core.setOutput('anomalous', 'false');
     core.setOutput('count', '0');
+    core.setOutput('window', String(windowSeconds));
+    core.setOutput('threshold', String(threshold));
     return;
   }
 
@@ -202,6 +223,8 @@ async function main(): Promise<void> {
     core.warning(`rate-limit: invalid GITHUB_REPOSITORY '${repository}' — skipping (fail-open)`);
     core.setOutput('anomalous', 'false');
     core.setOutput('count', '0');
+    core.setOutput('window', String(windowSeconds));
+    core.setOutput('threshold', String(threshold));
     return;
   }
   const owner = repository.slice(0, slashIdx);
@@ -238,6 +261,8 @@ async function main(): Promise<void> {
     );
     core.setOutput('anomalous', 'false');
     core.setOutput('count', '0');
+    core.setOutput('window', String(windowSeconds));
+    core.setOutput('threshold', String(threshold));
   }
 }
 

From 731b5221073559e7019cc40093fa5d269c2ac029 Mon Sep 17 00:00:00 2001
From: Sayt-0 <louis-dalmorocompta@docker.com>
Date: Fri, 26 Jun 2026 18:04:31 +0200
Subject: [PATCH 6/6] fix(rate-limit): satisfy lint and pin clock in main()
 tests

The Lint job failed on a forbidden non-null assertion in the
listReviews early-stop predicate and on biome formatting of the long
RangeError throw. Extract submitted_at into a local so the predicate
needs no '!', and let biome reflow the affected lines.

The Unit Tests job failed on the main() anomalous-run case: main()
reads the clock through Date.now() (no injectable nowMs), so the mocked
replies timestamped relative to the fixed NOW fell outside the window
against the real clock, yielding count 0. Pin the system clock to NOW
with fake timers in the main() describe so the fixtures stay inside the
sliding window.
---
 src/rate-limit/__tests__/rate-limit.test.ts | 23 +++++++++++++++++----
 src/rate-limit/index.ts                     | 17 +++++++--------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/rate-limit/__tests__/rate-limit.test.ts b/src/rate-limit/__tests__/rate-limit.test.ts
index 74694b1..320de36 100644
--- a/src/rate-limit/__tests__/rate-limit.test.ts
+++ b/src/rate-limit/__tests__/rate-limit.test.ts
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import * as core from '@actions/core';
-import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 vi.mock('@actions/core');
 
@@ -223,9 +223,15 @@ describe('detectRateAnomaly', () => {
 
   it('throws RangeError for non-positive windowSeconds', async () => {
     routePaginate([], [], []);
-    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: 0 })).rejects.toThrow(RangeError);
-    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: -1 })).rejects.toThrow(RangeError);
-    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: NaN })).rejects.toThrow(RangeError);
+    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: 0 })).rejects.toThrow(
+      RangeError,
+    );
+    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: -1 })).rejects.toThrow(
+      RangeError,
+    );
+    await expect(detectRateAnomaly('tok', { ...base, windowSeconds: NaN })).rejects.toThrow(
+      RangeError,
+    );
   });
 
   it('throws RangeError for non-positive threshold', async () => {
@@ -238,8 +244,17 @@ describe('detectRateAnomaly', () => {
 describe('main()', () => {
   const mockedSetOutput = vi.mocked(core.setOutput);
 
+  // main() reads the clock via Date.now() (it cannot take an injected nowMs),
+  // so pin the system clock to NOW to keep the mocked comment timestamps inside
+  // the sliding window.
   beforeEach(() => {
     vi.unstubAllEnvs();
+    vi.useFakeTimers();
+    vi.setSystemTime(NOW);
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
   });
 
   it('fails open and emits all four outputs when token is missing', async () => {
diff --git a/src/rate-limit/index.ts b/src/rate-limit/index.ts
index a9fbd79..9b04153 100644
--- a/src/rate-limit/index.ts
+++ b/src/rate-limit/index.ts
@@ -123,7 +123,9 @@ export async function detectRateAnomaly(
   opts: RateAnomalyOptions,
 ): Promise<RateAnomalyResult> {
   if (!Number.isFinite(opts.windowSeconds) || opts.windowSeconds <= 0) {
-    throw new RangeError(`windowSeconds must be a positive finite number, got ${opts.windowSeconds}`);
+    throw new RangeError(
+      `windowSeconds must be a positive finite number, got ${opts.windowSeconds}`,
+    );
   }
   if (!Number.isFinite(opts.threshold) || opts.threshold <= 0) {
     throw new RangeError(`threshold must be a positive integer, got ${opts.threshold}`);
@@ -164,14 +166,11 @@ export async function detectRateAnomaly(
         per_page: 100,
       })) {
         acc.push(...(page.data as ReviewLike[]));
-        if (
-          page.data.every(
-            (r) =>
-              !(r as ReviewLike).submitted_at ||
-              Date.parse((r as ReviewLike).submitted_at!) < windowStartMs,
-          )
-        )
-          break;
+        const allBeforeWindow = page.data.every((r) => {
+          const submittedAt = (r as ReviewLike).submitted_at;
+          return !submittedAt || Date.parse(submittedAt) < windowStartMs;
+        });
+        if (allBeforeWindow) break;
       }
       return acc;
     })(),