Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ help:
@echo " go-vet - Run go vet"
@echo " go-tidy - Run go mod tidy"
@echo " lint-md-links - Check markdown files for broken in-repo links and anchors"
@echo " script-test - Run shell script tests (post-triage, post-code, reconcile-repos, validate-output-schema)"
@echo " script-test - Run shell script tests (post-triage, post-code, post-review, reconcile-repos, validate-output-schema)"
@echo " test - Run all checks: lint, go-vet, go-test, script-test"
@echo " e2e-test - Run admin e2e tests (requires E2E_GITHUB_SESSION_FILE or E2E_GITHUB_USERNAME + E2E_GITHUB_PASSWORD)"
@echo " e2e-export-session - Login to GitHub and export a Playwright session file"
Expand Down Expand Up @@ -108,6 +108,7 @@ lint-md-links:
script-test:
bash internal/scaffold/fullsend-repo/scripts/post-triage-test.sh
bash internal/scaffold/fullsend-repo/scripts/post-code-test.sh
bash internal/scaffold/fullsend-repo/scripts/post-review-test.sh
bash internal/scaffold/fullsend-repo/scripts/reconcile-repos-test.sh
bash internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh
python3 internal/scaffold/fullsend-repo/scripts/process-fix-result-test.py
Expand Down
109 changes: 109 additions & 0 deletions internal/scaffold/fullsend-repo/scripts/post-review-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env bash
# post-review-test.sh — Test the outcome-label logic in post-review.sh.
#
# Extracts and tests the label-application logic in isolation using shell
# functions. This avoids needing a live GitHub API or fullsend CLI.
#
# Run from the repo root:
# bash internal/scaffold/fullsend-repo/scripts/post-review-test.sh

set -euo pipefail

FAILURES=0

# ---------------------------------------------------------------------------
# Test helper — reimplements the outcome-label logic from post-review.sh
# so we can test it without network access.
#
# Arguments:
# $1 — ACTION (the original action from agent-result.json)
# $2 — DOWNGRADED ("true" or "false")
#
# Prints the label that would be applied, or "none" if no label.
# ---------------------------------------------------------------------------
determine_outcome_label() {
local action="$1"
local downgraded="$2"

if [ "${action}" = "approve" ] && [ "${downgraded}" = "false" ]; then
echo "ready-for-merge"
elif [ "${action}" = "approve" ] && [ "${downgraded}" = "true" ]; then
echo "requires-manual-review"
elif [ "${action}" = "comment" ]; then
echo "requires-manual-review"
elif [ "${action}" = "request_changes" ]; then
echo "none"
elif [ "${action}" = "reject" ]; then
echo "rejected"
else
echo "none"
fi
}

run_test() {
local test_name="$1"
local action="$2"
local downgraded="$3"
local expected="$4"

local actual
actual="$(determine_outcome_label "${action}" "${downgraded}")"

if [ "${actual}" != "${expected}" ]; then
echo "FAIL: ${test_name}"
echo " action: '${action}'"
echo " downgraded: '${downgraded}'"
echo " expected: '${expected}'"
echo " actual: '${actual}'"
FAILURES=$((FAILURES + 1))
return
fi

echo "PASS: ${test_name}"
}

# --- Test cases ---

# Approve without protected-path downgrade → ready-for-merge
run_test "approve-no-downgrade" \
"approve" "false" "ready-for-merge"

# Approve with protected-path downgrade → requires-manual-review
run_test "approve-with-downgrade" \
"approve" "true" "requires-manual-review"

# Comment (split/conflicting review) → requires-manual-review
run_test "comment-split-review" \
"comment" "false" "requires-manual-review"

# request_changes → no outcome label
run_test "request-changes-no-label" \
"request_changes" "false" "none"

# reject → rejected
run_test "reject-label" \
"reject" "false" "rejected"

# Defensive: comment + downgraded=true can't occur in production (DOWNGRADED is
# only set inside the approve branch), but verify the label logic handles it.
run_test "comment-with-downgrade-flag" \
"comment" "true" "requires-manual-review"

# Edge cases: ensure unknown/empty actions produce no label
run_test "empty-action-no-label" \
"" "false" "none"

run_test "failure-action-no-label" \
"failure" "false" "none"

run_test "unknown-action-no-label" \
"banana" "false" "none"

# --- Summary ---

echo ""
if [ "${FAILURES}" -gt 0 ]; then
echo "${FAILURES} test(s) failed"
exit 1
fi
echo "All tests passed"
40 changes: 39 additions & 1 deletion internal/scaffold/fullsend-repo/scripts/post-review.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ fi
echo "Using result: ${RESULT_FILE}"

ACTION=$(jq -r '.action' "${RESULT_FILE}")
# ACTION retains the original value for the entire script — not re-read after protected-path downgrade.

# ---------------------------------------------------------------------------
# Protected-path check: the review agent must not approve PRs that touch
Expand All @@ -73,6 +74,7 @@ REVIEW_PROTECTED_PATHS=(
".gitattributes"
)

DOWNGRADED=false
if [ "${ACTION}" = "approve" ]; then
PR_FILES=$(gh pr view "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" --json files --jq '.files[].path')
if [ -z "${PR_FILES}" ]; then
Expand Down Expand Up @@ -113,6 +115,7 @@ if [ "${ACTION}" = "approve" ]; then
'.action = "comment" | .body = (.body + $notice)' \
"${RESULT_FILE}" > "${MODIFIED_RESULT}"
RESULT_FILE="${MODIFIED_RESULT}"
DOWNGRADED=true
fi
fi

Expand All @@ -122,14 +125,49 @@ fullsend post-review \
--token "${REVIEW_TOKEN}" \
--result "${RESULT_FILE}"

if [ "${ACTION}" = "reject" ]; then
# ---------------------------------------------------------------------------
# Outcome labels: apply labels based on the review action.
# Labels are created if missing, matching the needs-human pattern in
# post-fix.sh.
# Label logic is mirrored in post-review-test.sh — update both.
# ---------------------------------------------------------------------------

# Remove stale outcome labels from prior runs before applying the new one.
# 2>/dev/null is intentional: unlike --add-label (where we want to see failures),
# removal of a non-existent label is the common case and not worth logging.
for stale_label in "ready-for-merge" "requires-manual-review" "rejected"; do
gh pr edit "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" \
--remove-label "${stale_label}" 2>/dev/null || true
done

if [ "${ACTION}" = "approve" ] && [ "${DOWNGRADED}" = "false" ]; then
echo "Approve disposition — applying ready-for-merge label"
gh label create "ready-for-merge" --repo "${REPO_FULL_NAME}" \
--description "All reviewers approved — ready to merge" --color "0E8A16" \
2>/dev/null || true
gh pr edit "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" \
--add-label "ready-for-merge" || true
elif { [ "${ACTION}" = "approve" ] && [ "${DOWNGRADED}" = "true" ]; } || \
[ "${ACTION}" = "comment" ]; then
echo "Review requires human judgment — applying requires-manual-review label"
gh label create "requires-manual-review" --repo "${REPO_FULL_NAME}" \
--description "Review requires human judgment" --color "FBCA04" \
Comment thread
ben-alkov marked this conversation as resolved.
2>/dev/null || true
gh pr edit "${PR_NUMBER}" --repo "${REPO_FULL_NAME}" \
--add-label "requires-manual-review" || true
elif [ "${ACTION}" = "reject" ]; then
echo "Reject disposition — closing PR and applying label"
gh label create "rejected" --repo "${REPO_FULL_NAME}" \
--description "Approach rejected by review agent" --color "B60205" \
2>/dev/null || true
gh pr close "${PR_NUMBER}" \
--repo "${REPO_FULL_NAME}" \
--comment "Closed by review agent: approach rejected." || true
gh pr edit "${PR_NUMBER}" \
--repo "${REPO_FULL_NAME}" \
--add-label "rejected" || true
elif [ "${ACTION}" = "request_changes" ]; then
echo "Request-changes disposition — no outcome label (fix agent triggers on event)"
fi

echo "Review posted on ${REPO_FULL_NAME}#${PR_NUMBER}"
Loading