From e903bf9014460e0a0c44e54146e08628ffb23e94 Mon Sep 17 00:00:00 2001 From: fullsend-code Date: Wed, 13 May 2026 12:28:10 +0000 Subject: [PATCH] feat(review-agent): add outcome labels to post-review.sh After posting the review verdict, post-review.sh now applies outcome labels based on the review action, completing the label state machine documented in the bugfix workflow guide: - approve (no protected-path downgrade) -> ready-for-merge - approve (downgraded to comment) -> requires-manual-review - comment (split/conflicting review) -> requires-manual-review - request_changes -> no label (fix agent triggers on the event) Labels are created if missing, matching the needs-human pattern in post-fix.sh. A DOWNGRADED flag tracks whether the protected-path check changed the action so the correct label is applied based on the final posted action rather than the original. Closes #552 Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Ben Alkov --- Makefile | 3 +- .../fullsend-repo/scripts/post-review-test.sh | 109 ++++++++++++++++++ .../fullsend-repo/scripts/post-review.sh | 40 ++++++- 3 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 internal/scaffold/fullsend-repo/scripts/post-review-test.sh diff --git a/Makefile b/Makefile index 66c41c923..5eb451db1 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ help: @echo " go-vet - Run go vet" @echo " go-tidy - Run go mod tidy" @echo " lint-md-links - Check markdown files for broken in-repo links and anchors" - @echo " script-test - Run shell script tests (post-triage, post-code, reconcile-repos, validate-output-schema)" + @echo " script-test - Run shell script tests (post-triage, post-code, post-review, reconcile-repos, validate-output-schema)" @echo " test - Run all checks: lint, go-vet, go-test, script-test" @echo " e2e-test - Run admin e2e tests (requires E2E_GITHUB_SESSION_FILE or E2E_GITHUB_USERNAME + E2E_GITHUB_PASSWORD)" @echo " e2e-export-session - Login to GitHub and export a Playwright session file" @@ -108,6 +108,7 @@ lint-md-links: script-test: bash internal/scaffold/fullsend-repo/scripts/post-triage-test.sh bash internal/scaffold/fullsend-repo/scripts/post-code-test.sh + bash internal/scaffold/fullsend-repo/scripts/post-review-test.sh bash internal/scaffold/fullsend-repo/scripts/reconcile-repos-test.sh bash internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh python3 internal/scaffold/fullsend-repo/scripts/process-fix-result-test.py diff --git a/internal/scaffold/fullsend-repo/scripts/post-review-test.sh b/internal/scaffold/fullsend-repo/scripts/post-review-test.sh new file mode 100644 index 000000000..7301542a2 --- /dev/null +++ b/internal/scaffold/fullsend-repo/scripts/post-review-test.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# post-review-test.sh — Test the outcome-label logic in post-review.sh. +# +# Extracts and tests the label-application logic in isolation using shell +# functions. This avoids needing a live GitHub API or fullsend CLI. +# +# Run from the repo root: +# bash internal/scaffold/fullsend-repo/scripts/post-review-test.sh + +set -euo pipefail + +FAILURES=0 + +# --------------------------------------------------------------------------- +# Test helper — reimplements the outcome-label logic from post-review.sh +# so we can test it without network access. +# +# Arguments: +# $1 — ACTION (the original action from agent-result.json) +# $2 — DOWNGRADED ("true" or "false") +# +# Prints the label that would be applied, or "none" if no label. +# --------------------------------------------------------------------------- +determine_outcome_label() { + local action="$1" + local downgraded="$2" + + if [ "${action}" = "approve" ] && [ "${downgraded}" = "false" ]; then + echo "ready-for-merge" + elif [ "${action}" = "approve" ] && [ "${downgraded}" = "true" ]; then + echo "requires-manual-review" + elif [ "${action}" = "comment" ]; then + echo "requires-manual-review" + elif [ "${action}" = "request_changes" ]; then + echo "none" + elif [ "${action}" = "reject" ]; then + echo "rejected" + else + echo "none" + fi +} + +run_test() { + local test_name="$1" + local action="$2" + local downgraded="$3" + local expected="$4" + + local actual + actual="$(determine_outcome_label "${action}" "${downgraded}")" + + if [ "${actual}" != "${expected}" ]; then + echo "FAIL: ${test_name}" + echo " action: '${action}'" + echo " downgraded: '${downgraded}'" + echo " expected: '${expected}'" + echo " actual: '${actual}'" + FAILURES=$((FAILURES + 1)) + return + fi + + echo "PASS: ${test_name}" +} + +# --- Test cases --- + +# Approve without protected-path downgrade → ready-for-merge +run_test "approve-no-downgrade" \ + "approve" "false" "ready-for-merge" + +# Approve with protected-path downgrade → requires-manual-review +run_test "approve-with-downgrade" \ + "approve" "true" "requires-manual-review" + +# Comment (split/conflicting review) → requires-manual-review +run_test "comment-split-review" \ + "comment" "false" "requires-manual-review" + +# request_changes → no outcome label +run_test "request-changes-no-label" \ + "request_changes" "false" "none" + +# reject → rejected +run_test "reject-label" \ + "reject" "false" "rejected" + +# Defensive: comment + downgraded=true can't occur in production (DOWNGRADED is +# only set inside the approve branch), but verify the label logic handles it. +run_test "comment-with-downgrade-flag" \ + "comment" "true" "requires-manual-review" + +# Edge cases: ensure unknown/empty actions produce no label +run_test "empty-action-no-label" \ + "" "false" "none" + +run_test "failure-action-no-label" \ + "failure" "false" "none" + +run_test "unknown-action-no-label" \ + "banana" "false" "none" + +# --- Summary --- + +echo "" +if [ "${FAILURES}" -gt 0 ]; then + echo "${FAILURES} test(s) failed" + exit 1 +fi +echo "All tests passed" diff --git a/internal/scaffold/fullsend-repo/scripts/post-review.sh b/internal/scaffold/fullsend-repo/scripts/post-review.sh index f7b79a169..dc5068bd4 100755 --- a/internal/scaffold/fullsend-repo/scripts/post-review.sh +++ b/internal/scaffold/fullsend-repo/scripts/post-review.sh @@ -53,6 +53,7 @@ fi echo "Using result: ${RESULT_FILE}" ACTION=$(jq -r '.action' "${RESULT_FILE}") +# ACTION retains the original value for the entire script — not re-read after protected-path downgrade. # --------------------------------------------------------------------------- # Protected-path check: the review agent must not approve PRs that touch @@ -73,6 +74,7 @@ REVIEW_PROTECTED_PATHS=( ".gitattributes" ) +DOWNGRADED=false if [ "${ACTION}" = "approve" ]; then PR_FILES=$(gh pr view "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" --json files --jq '.files[].path') if [ -z "${PR_FILES}" ]; then @@ -113,6 +115,7 @@ if [ "${ACTION}" = "approve" ]; then '.action = "comment" | .body = (.body + $notice)' \ "${RESULT_FILE}" > "${MODIFIED_RESULT}" RESULT_FILE="${MODIFIED_RESULT}" + DOWNGRADED=true fi fi @@ -122,14 +125,49 @@ fullsend post-review \ --token "${REVIEW_TOKEN}" \ --result "${RESULT_FILE}" -if [ "${ACTION}" = "reject" ]; then +# --------------------------------------------------------------------------- +# Outcome labels: apply labels based on the review action. +# Labels are created if missing, matching the needs-human pattern in +# post-fix.sh. +# Label logic is mirrored in post-review-test.sh — update both. +# --------------------------------------------------------------------------- + +# Remove stale outcome labels from prior runs before applying the new one. +# 2>/dev/null is intentional: unlike --add-label (where we want to see failures), +# removal of a non-existent label is the common case and not worth logging. +for stale_label in "ready-for-merge" "requires-manual-review" "rejected"; do + gh pr edit "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" \ + --remove-label "${stale_label}" 2>/dev/null || true +done + +if [ "${ACTION}" = "approve" ] && [ "${DOWNGRADED}" = "false" ]; then + echo "Approve disposition — applying ready-for-merge label" + gh label create "ready-for-merge" --repo "${REPO_FULL_NAME}" \ + --description "All reviewers approved — ready to merge" --color "0E8A16" \ + 2>/dev/null || true + gh pr edit "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" \ + --add-label "ready-for-merge" || true +elif { [ "${ACTION}" = "approve" ] && [ "${DOWNGRADED}" = "true" ]; } || \ + [ "${ACTION}" = "comment" ]; then + echo "Review requires human judgment — applying requires-manual-review label" + gh label create "requires-manual-review" --repo "${REPO_FULL_NAME}" \ + --description "Review requires human judgment" --color "FBCA04" \ + 2>/dev/null || true + gh pr edit "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" \ + --add-label "requires-manual-review" || true +elif [ "${ACTION}" = "reject" ]; then echo "Reject disposition — closing PR and applying label" + gh label create "rejected" --repo "${REPO_FULL_NAME}" \ + --description "Approach rejected by review agent" --color "B60205" \ + 2>/dev/null || true gh pr close "${PR_NUMBER}" \ --repo "${REPO_FULL_NAME}" \ --comment "Closed by review agent: approach rejected." || true gh pr edit "${PR_NUMBER}" \ --repo "${REPO_FULL_NAME}" \ --add-label "rejected" || true +elif [ "${ACTION}" = "request_changes" ]; then + echo "Request-changes disposition — no outcome label (fix agent triggers on event)" fi echo "Review posted on ${REPO_FULL_NAME}#${PR_NUMBER}"