From 76764612a669721184cff2cd9d1b282c6336c6cc Mon Sep 17 00:00:00 2001 From: "rw-codebundle-agent[bot]" Date: Tue, 14 Apr 2026 10:48:52 +0000 Subject: [PATCH] Add vercel-project-http-error-health CodeBundle Monitors Vercel runtime logs for production deployments: validate API access, resolve latest READY production deployment, summarize 4xx/5xx rates vs thresholds, list top failing paths, and expose an SLI health score. Uses GET /v9/projects, /v6/deployments, and /v1/.../runtime-logs with bearer auth. Refs: runwhen-contrib/codecollection-registry#80 Made-with: Cursor --- .../vercel-project-http-error-health.yaml | 22 ++ .../vercel-project-http-error-health-sli.yaml | 52 ++++ .../vercel-project-http-error-health-slx.yaml | 29 ++ ...cel-project-http-error-health-taskset.yaml | 43 +++ .../.test/Taskfile.yaml | 51 ++++ .../.test/validate-bundle-structure.sh | 11 + .../README.md | 56 ++++ .../runbook.robot | 285 ++++++++++++++++++ .../sli.robot | 104 +++++++ .../vercel-analyze-common.sh | 53 ++++ .../vercel-lib.sh | 57 ++++ .../vercel-resolve-deployment.sh | 73 +++++ .../vercel-sli-health.sh | 63 ++++ .../vercel-summarize-4xx-rate.sh | 75 +++++ .../vercel-summarize-5xx-rate.sh | 69 +++++ .../vercel-top-paths-4xx.sh | 50 +++ .../vercel-top-paths-5xx.sh | 44 +++ .../vercel-validate-project.sh | 84 ++++++ 18 files changed, 1221 insertions(+) create mode 100644 codebundles/vercel-project-http-error-health/.runwhen/generation-rules/vercel-project-http-error-health.yaml create mode 100644 codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-sli.yaml create mode 100644 codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-slx.yaml create mode 100644 codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-taskset.yaml create mode 100644 codebundles/vercel-project-http-error-health/.test/Taskfile.yaml create mode 100755 codebundles/vercel-project-http-error-health/.test/validate-bundle-structure.sh create mode 100644 codebundles/vercel-project-http-error-health/README.md create mode 100644 codebundles/vercel-project-http-error-health/runbook.robot create mode 100644 codebundles/vercel-project-http-error-health/sli.robot create mode 100755 codebundles/vercel-project-http-error-health/vercel-analyze-common.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-lib.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-resolve-deployment.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-sli-health.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-summarize-4xx-rate.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-summarize-5xx-rate.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-top-paths-4xx.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-top-paths-5xx.sh create mode 100755 codebundles/vercel-project-http-error-health/vercel-validate-project.sh diff --git a/codebundles/vercel-project-http-error-health/.runwhen/generation-rules/vercel-project-http-error-health.yaml b/codebundles/vercel-project-http-error-health/.runwhen/generation-rules/vercel-project-http-error-health.yaml new file mode 100644 index 00000000..d37655dd --- /dev/null +++ b/codebundles/vercel-project-http-error-health/.runwhen/generation-rules/vercel-project-http-error-health.yaml @@ -0,0 +1,22 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: vercel + generationRules: + - resourceTypes: + - vercel_project + matchRules: + - type: pattern + pattern: ".+" + properties: [name] + mode: substring + slxs: + - baseName: vercel-project-http-error-health + qualifiers: ["team", "project"] + baseTemplateName: vercel-project-http-error-health + levelOfDetail: basic + outputItems: + - type: slx + - type: sli + - type: runbook + templateName: vercel-project-http-error-health-taskset.yaml diff --git a/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-sli.yaml b/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-sli.yaml new file mode 100644 index 00000000..9e829e94 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-sli.yaml @@ -0,0 +1,52 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: Health Score + displayUnitsShort: score + locations: + - {{default_location}} + description: Aggregates binary 5xx and 4xx health from sampled Vercel runtime logs into a 0-1 score. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/vercel-project-http-error-health/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 180 + configProvided: + - name: VERCEL_TEAM_ID + value: "{{ match_resource.resource.team_id }}" + - name: VERCEL_PROJECT + value: "{{ match_resource.resource.name }}" + - name: LOOKBACK_MINUTES + value: "60" + - name: ERROR_RATE_THRESHOLD_PCT + value: "1" + - name: MIN_ERROR_EVENTS + value: "5" + - name: EXCLUDE_404_FROM_4XX + value: "true" + secretsProvided: + {% if wb_version %} + {% include "vercel-auth.yaml" ignore missing %} + {% else %} + - name: vercel_api_token + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} + alertConfig: + tasks: + persona: eager-edgar + sessionTTL: 10m diff --git a/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-slx.yaml b/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-slx.yaml new file mode 100644 index 00000000..3792d798 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-slx.yaml @@ -0,0 +1,29 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{ slx_name }} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/cloud/cloud.svg + alias: Vercel HTTP Error Health for {{ match_resource.resource.name }} + asMeasuredBy: Sampled runtime log error rates for 5xx and 4xx responses versus thresholds. + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{ workspace.owner_email }} + statement: Production HTTP 4xx/5xx rates from Vercel runtime logs should remain within configured thresholds. + additionalContext: + qualified_name: "{{ match_resource.qualified_name }}" + tags: + - name: cloud + value: vercel + - name: service + value: vercel_project + - name: scope + value: project + - name: access + value: read-only diff --git a/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-taskset.yaml b/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-taskset.yaml new file mode 100644 index 00000000..63c7c9a5 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/.runwhen/templates/vercel-project-http-error-health-taskset.yaml @@ -0,0 +1,43 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + description: Monitors Vercel runtime HTTP 4xx/5xx health for a project deployment using sampled logs. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/vercel-project-http-error-health/runbook.robot + configProvided: + - name: VERCEL_TEAM_ID + value: "{{ match_resource.resource.team_id }}" + - name: VERCEL_PROJECT + value: "{{ match_resource.resource.name }}" + - name: LOOKBACK_MINUTES + value: "60" + - name: ERROR_RATE_THRESHOLD_PCT + value: "1" + - name: MIN_ERROR_EVENTS + value: "5" + - name: EXCLUDE_404_FROM_4XX + value: "true" + secretsProvided: + {% if wb_version %} + {% include "vercel-auth.yaml" ignore missing %} + {% else %} + - name: vercel_api_token + workspaceKey: AUTH DETAILS NOT FOUND + {% endif %} diff --git a/codebundles/vercel-project-http-error-health/.test/Taskfile.yaml b/codebundles/vercel-project-http-error-health/.test/Taskfile.yaml new file mode 100644 index 00000000..f0a734ae --- /dev/null +++ b/codebundles/vercel-project-http-error-health/.test/Taskfile.yaml @@ -0,0 +1,51 @@ +version: "3" + +tasks: + default: + desc: "Validate bundle structure" + cmds: + - task: validate-bundle-structure + + clean: + desc: "Remove generated workspaceInfo if present" + cmds: + - rm -f workspaceInfo.yaml + + build-infra: + desc: "No external infra required for Vercel API bundle; runs validation only" + cmds: + - task: validate-bundle-structure + + validate-bundle-structure: + desc: "Check runbook, SLI, and RunWhen templates are present" + cmds: + - bash ./validate-bundle-structure.sh + silent: true + + check-unpushed-commits: + desc: "Check for uncommitted changes outside .test" + vars: + BASE_DIR: "../" + cmds: + - | + UNCOMMITTED=$(git diff --name-only HEAD 2>/dev/null | grep -E "^${BASE_DIR}" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED" ]; then + echo "Uncommitted changes found. Commit before integration testing." + exit 1 + fi + silent: true + + generate-rwl-config: + desc: "Stub — Vercel bundle uses API tokens; extend for RunWhen Local if needed" + cmds: + - 'echo "stub: add workspaceInfo.yaml for rwl when testing against a workspace"' + + run-rwl-discovery: + desc: "Stub — discovery requires workspace context" + cmds: + - 'echo "stub: run RunWhen Local discovery when workspace resources exist"' + + clean-rwl-discovery: + desc: "Remove discovery output" + cmds: + - rm -rf output diff --git a/codebundles/vercel-project-http-error-health/.test/validate-bundle-structure.sh b/codebundles/vercel-project-http-error-health/.test/validate-bundle-structure.sh new file mode 100755 index 00000000..6c7f0d1a --- /dev/null +++ b/codebundles/vercel-project-http-error-health/.test/validate-bundle-structure.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Sanity check that required CodeBundle files exist (local dev / CI helper). +set -euo pipefail +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +test -f "$ROOT/runbook.robot" +test -f "$ROOT/sli.robot" +test -f "$ROOT/.runwhen/generation-rules/vercel-project-http-error-health.yaml" +test -f "$ROOT/.runwhen/templates/vercel-project-http-error-health-slx.yaml" +test -f "$ROOT/.runwhen/templates/vercel-project-http-error-health-taskset.yaml" +test -f "$ROOT/.runwhen/templates/vercel-project-http-error-health-sli.yaml" +echo "vercel-project-http-error-health bundle structure OK" diff --git a/codebundles/vercel-project-http-error-health/README.md b/codebundles/vercel-project-http-error-health/README.md new file mode 100644 index 00000000..abe4b058 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/README.md @@ -0,0 +1,56 @@ +# Vercel Project HTTP Error Health + +This CodeBundle monitors frontend and edge/serverless request health on Vercel by sampling **runtime logs** for the latest **production** deployment. It reports 4xx and 5xx rates against configurable thresholds, optional 404 handling for 4xx analysis, and the top request paths driving failures. + +## Overview + +- **Access validation**: Confirms the API token can read the team scope and resolves the project by id or slug. +- **Deployment selection**: Uses the latest `READY` deployment with `target=production` as the log source (documented in the resolve task output). +- **Error rates**: Computes 5xx and (optionally non-404) 4xx rates from sampled request rows in `LOOKBACK_MINUTES`, compared to `ERROR_RATE_THRESHOLD_PCT` and `MIN_ERROR_EVENTS`. +- **Top paths**: Lists the most frequent failing paths for 5xx and for 4xx (with optional 404 exclusion). + +Runtime logs are retrieved via `GET /v1/projects/{projectId}/deployments/{deploymentId}/runtime-logs` (NDJSON). High-volume traffic may be **sampled** by line limits; rates are approximate and documented in task output. + +## Configuration + +### Required Variables + +- `VERCEL_TEAM_ID`: Vercel team id (`teamId` query parameter for API calls). +- `VERCEL_PROJECT`: Project id or slug to analyze. + +### Optional Variables + +- `LOOKBACK_MINUTES`: Log window ending at now (default: `60`). +- `ERROR_RATE_THRESHOLD_PCT`: Percent of sampled request rows that may be errors before raising a rate issue (default: `1`). +- `MIN_ERROR_EVENTS`: Minimum error count before treating a high rate as a high-severity signal (default: `5`). +- `EXCLUDE_404_FROM_4XX`: If `true`, HTTP 404 is excluded from 4xx summaries and top-path lists (default: `true`). + +### Secrets + +- `vercel_api_token`: Vercel bearer token with permission to read projects and deployment runtime logs (personal or team token). + +## Tasks Overview + +### Validate Vercel API Access and Resolve Project + +Calls `GET /v9/projects/{idOrName}` to verify credentials and resolve the project. Raises issues on HTTP 401/403/404 or other API failures. + +### Resolve Production Deployment for Log Analysis + +Lists production deployments and picks the newest `READY` deployment. Raises an issue if none are available (for example preview-only projects). + +### Summarize 5xx Server Error Rate + +Counts HTTP 500–599 responses in sampled logs and compares the rate to `ERROR_RATE_THRESHOLD_PCT` and `MIN_ERROR_EVENTS`. + +### Summarize 4xx Client Error Rate (incl. 400) + +Counts HTTP 400–499 responses, optionally excluding 404 when `EXCLUDE_404_FROM_4XX` is true, using the same rate thresholds. + +### List Top Error Paths by 5xx Count + +Prints a ranked list of paths by 5xx volume for the lookback window (informational; issues only on resolution failures). + +### List Top Paths by 4xx (non-404) Count + +Same for 4xx, respecting `EXCLUDE_404_FROM_4XX`. diff --git a/codebundles/vercel-project-http-error-health/runbook.robot b/codebundles/vercel-project-http-error-health/runbook.robot new file mode 100644 index 00000000..2affe9c9 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/runbook.robot @@ -0,0 +1,285 @@ +*** Settings *** +Documentation Monitors Vercel runtime request logs for a production deployment to surface 4xx/5xx rates, threshold breaches, and top failing paths. +Metadata Author rw-codebundle-agent +Metadata Display Name Vercel Project HTTP Error Health +Metadata Supports Vercel vercel_project HTTP errors runtime logs +Force Tags Vercel vercel_project HTTP errors runtime_logs + +Library String +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + + +*** Tasks *** +Validate Vercel API Access and Resolve Project for `${VERCEL_PROJECT}` + [Documentation] Confirms the bearer token can read the team scope and resolves the project id or slug, failing fast with a clear issue when credentials or identifiers are wrong. + [Tags] Vercel vercel_project access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=vercel-validate-project.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=VERCEL_TEAM_ID="${VERCEL_TEAM_ID}" VERCEL_PROJECT="${VERCEL_PROJECT}" ./vercel-validate-project.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat vercel_validate_issues.json + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Vercel API should return HTTP 200 and project metadata for `${VERCEL_PROJECT}` under team `${VERCEL_TEAM_ID}` + ... actual=Vercel API rejected or could not resolve the project (see details) + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Validate Vercel project access:\n${result.stdout} + +Resolve Production Deployment for Log Analysis for Project `${VERCEL_PROJECT}` + [Documentation] Selects the latest READY production deployment used as the log source for the lookback window and documents deployment id and URL in the report. + [Tags] Vercel vercel_project deployment access:read-only data:config + + ${result}= RW.CLI.Run Bash File + ... bash_file=vercel-resolve-deployment.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=180 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=VERCEL_TEAM_ID="${VERCEL_TEAM_ID}" VERCEL_PROJECT="${VERCEL_PROJECT}" ./vercel-resolve-deployment.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat vercel_resolve_issues.json + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=A READY production deployment should exist for log analysis + ... actual=No suitable production deployment found or deployments API error + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Resolve production deployment:\n${result.stdout} + +Summarize 5xx Server Error Rate for Project `${VERCEL_PROJECT}` + [Documentation] Aggregates runtime logs for the deployment, counts HTTP 500-599 responses, compares the error rate to `${ERROR_RATE_THRESHOLD_PCT}` and minimum event count, and raises issues when thresholds are breached. + [Tags] Vercel vercel_project metrics 5xx access:read-only data:metrics + + ${result}= RW.CLI.Run Bash File + ... bash_file=vercel-summarize-5xx-rate.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=300 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./vercel-summarize-5xx-rate.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat vercel_5xx_issues.json + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=5xx rate should stay below `${ERROR_RATE_THRESHOLD_PCT}` with sufficient volume to trust the signal + ... actual=5xx rate or volume indicates an unhealthy error rate for sampled runtime logs + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report 5xx rate summary:\n${result.stdout} + +Summarize 4xx Client Error Rate (incl. 400) for Project `${VERCEL_PROJECT}` + [Documentation] Aggregates 4xx responses with optional exclusion of 404 when `${EXCLUDE_404_FROM_4XX}` is true, compares rates to thresholds, and highlights application client errors. + [Tags] Vercel vercel_project metrics 4xx access:read-only data:metrics + + ${result}= RW.CLI.Run Bash File + ... bash_file=vercel-summarize-4xx-rate.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=300 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./vercel-summarize-4xx-rate.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat vercel_4xx_issues.json + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Non-404 4xx rate should remain below `${ERROR_RATE_THRESHOLD_PCT}` for sampled traffic + ... actual=4xx rate or volume suggests validation, auth, or routing problems in sampled logs + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report 4xx rate summary:\n${result.stdout} + +List Top Error Paths by 5xx Count for Project `${VERCEL_PROJECT}` + [Documentation] Ranks request paths by the volume of 5xx responses in the lookback window to show which routes or assets fail most often. + [Tags] Vercel vercel_project metrics paths access:read-only data:metrics + + ${result}= RW.CLI.Run Bash File + ... bash_file=vercel-top-paths-5xx.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=300 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./vercel-top-paths-5xx.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat vercel_top_5xx_issues.json + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Top paths table should be available when deployment logs are reachable + ... actual=See issue details for path listing failures + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Top 5xx paths:\n${result.stdout} + +List Top Paths by 4xx (non-404) Count for Project `${VERCEL_PROJECT}` + [Documentation] Surfaces paths with the highest 4xx counts excluding 404 when configured, highlighting validation and auth issues distinct from missing pages. + [Tags] Vercel vercel_project metrics paths access:read-only data:metrics + + ${result}= RW.CLI.Run Bash File + ... bash_file=vercel-top-paths-4xx.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=300 + ... include_in_history=false + ... show_in_rwl_cheatsheet=true + ... cmd_override=./vercel-top-paths-4xx.sh + + ${issues}= RW.CLI.Run Cli + ... cmd=cat vercel_top_4xx_issues.json + TRY + ${issue_list}= Evaluate json.loads(r'''${issues.stdout}''') json + EXCEPT + Log Failed to parse JSON for task, defaulting to empty list. WARN + ${issue_list}= Create List + END + IF len(@{issue_list}) > 0 + FOR ${issue} IN @{issue_list} + RW.Core.Add Issue + ... severity=${issue['severity']} + ... expected=Top 4xx paths should be listed when logs are reachable + ... actual=See issue details for path listing failures + ... title=${issue['title']} + ... reproduce_hint=${result.cmd} + ... details=${issue['details']} + ... next_steps=${issue['next_steps']} + END + END + RW.Core.Add Pre To Report Top 4xx paths:\n${result.stdout} + + +*** Keywords *** +Suite Initialization + ${vercel_api_token}= RW.Core.Import Secret vercel_api_token + ... type=string + ... description=Vercel bearer token with read access to projects and deployment runtime logs. + ... pattern=\w* + + ${VERCEL_TEAM_ID}= RW.Core.Import User Variable VERCEL_TEAM_ID + ... type=string + ... description=Vercel teamId used to scope REST API calls. + ... pattern=\w* + + ${VERCEL_PROJECT}= RW.Core.Import User Variable VERCEL_PROJECT + ... type=string + ... description=Project id or slug to analyze. + ... pattern=\w* + + ${LOOKBACK_MINUTES}= RW.Core.Import User Variable LOOKBACK_MINUTES + ... type=string + ... description=Minutes of runtime logs to sample relative to now. + ... pattern=^\d+$ + ... default=60 + + ${ERROR_RATE_THRESHOLD_PCT}= RW.Core.Import User Variable ERROR_RATE_THRESHOLD_PCT + ... type=string + ... description=Issue when error rate exceeds this percent of sampled request rows. + ... pattern=^\d+(\.\d+)?$ + ... default=1 + + ${MIN_ERROR_EVENTS}= RW.Core.Import User Variable MIN_ERROR_EVENTS + ... type=string + ... description=Minimum error events before raising a high-severity rate issue. + ... pattern=^\d+$ + ... default=5 + + ${EXCLUDE_404_FROM_4XX}= RW.Core.Import User Variable EXCLUDE_404_FROM_4XX + ... type=string + ... description=If true, exclude HTTP 404 from 4xx summaries and top-path lists. + ... pattern=\w* + ... default=true + + Set Suite Variable ${vercel_api_token} ${vercel_api_token} + Set Suite Variable ${VERCEL_TEAM_ID} ${VERCEL_TEAM_ID} + Set Suite Variable ${VERCEL_PROJECT} ${VERCEL_PROJECT} + Set Suite Variable ${LOOKBACK_MINUTES} ${LOOKBACK_MINUTES} + Set Suite Variable ${ERROR_RATE_THRESHOLD_PCT} ${ERROR_RATE_THRESHOLD_PCT} + Set Suite Variable ${MIN_ERROR_EVENTS} ${MIN_ERROR_EVENTS} + Set Suite Variable ${EXCLUDE_404_FROM_4XX} ${EXCLUDE_404_FROM_4XX} + + ${env}= Create Dictionary + ... VERCEL_TEAM_ID=${VERCEL_TEAM_ID} + ... VERCEL_PROJECT=${VERCEL_PROJECT} + ... LOOKBACK_MINUTES=${LOOKBACK_MINUTES} + ... ERROR_RATE_THRESHOLD_PCT=${ERROR_RATE_THRESHOLD_PCT} + ... MIN_ERROR_EVENTS=${MIN_ERROR_EVENTS} + ... EXCLUDE_404_FROM_4XX=${EXCLUDE_404_FROM_4XX} + Set Suite Variable ${env} ${env} diff --git a/codebundles/vercel-project-http-error-health/sli.robot b/codebundles/vercel-project-http-error-health/sli.robot new file mode 100644 index 00000000..27d2d05c --- /dev/null +++ b/codebundles/vercel-project-http-error-health/sli.robot @@ -0,0 +1,104 @@ +*** Settings *** +Documentation Measures Vercel HTTP health for a project by scoring 5xx and 4xx error rates from sampled runtime logs. Produces a value between 0 (failing) and 1 (healthy). +Metadata Author rw-codebundle-agent +Metadata Display Name Vercel Project HTTP Error Health +Metadata Supports Vercel vercel_project HTTP SLI +Force Tags Vercel vercel_project HTTP SLI + +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + + +*** Tasks *** +Score Vercel 5xx and 4xx Health for Project `${VERCEL_PROJECT}` + [Documentation] Fetches a lightweight sample of runtime logs for the production deployment and emits binary sub-scores plus an aggregate mean between 0 and 1. + [Tags] Vercel access:read-only data:metrics + + ${snap}= RW.CLI.Run Bash File + ... bash_file=vercel-sli-health.sh + ... env=${env} + ... secret__VERCEL_API_TOKEN=${vercel_api_token} + ... timeout_seconds=60 + ... include_in_history=false + ... show_in_rwl_cheatsheet=false + ... cmd_override=./vercel-sli-health.sh + + TRY + ${m}= Evaluate json.loads(r'''${snap.stdout}''') json + ${s5}= Convert To Number ${m['score_5xx']} + ${s4}= Convert To Number ${m['score_4xx']} + ${agg}= Convert To Number ${m['aggregate']} + EXCEPT + Log SLI JSON parse failed; scoring 0. WARN + ${s5}= Convert To Number 0 + ${s4}= Convert To Number 0 + ${agg}= Convert To Number 0 + END + + RW.Core.Push Metric ${s5} sub_name=score_5xx + RW.Core.Push Metric ${s4} sub_name=score_4xx + RW.Core.Push Metric ${agg} + RW.Core.Add to Report Vercel HTTP SLI aggregate=${agg} (${snap.stdout}) + + +*** Keywords *** +Suite Initialization + ${vercel_api_token}= RW.Core.Import Secret vercel_api_token + ... type=string + ... description=Vercel bearer token with read access to projects and deployment runtime logs. + ... pattern=\w* + + ${VERCEL_TEAM_ID}= RW.Core.Import User Variable VERCEL_TEAM_ID + ... type=string + ... description=Vercel teamId used to scope REST API calls. + ... pattern=\w* + + ${VERCEL_PROJECT}= RW.Core.Import User Variable VERCEL_PROJECT + ... type=string + ... description=Project id or slug to analyze. + ... pattern=\w* + + ${LOOKBACK_MINUTES}= RW.Core.Import User Variable LOOKBACK_MINUTES + ... type=string + ... description=Minutes of runtime logs to sample relative to now. + ... pattern=^\d+$ + ... default=60 + + ${ERROR_RATE_THRESHOLD_PCT}= RW.Core.Import User Variable ERROR_RATE_THRESHOLD_PCT + ... type=string + ... description=Issue when error rate exceeds this percent of sampled request rows. + ... pattern=^\d+(\.\d+)?$ + ... default=1 + + ${MIN_ERROR_EVENTS}= RW.Core.Import User Variable MIN_ERROR_EVENTS + ... type=string + ... description=Minimum error events before treating a high rate as failing. + ... pattern=^\d+$ + ... default=5 + + ${EXCLUDE_404_FROM_4XX}= RW.Core.Import User Variable EXCLUDE_404_FROM_4XX + ... type=string + ... description=If true, exclude HTTP 404 from 4xx scoring. + ... pattern=\w* + ... default=true + + Set Suite Variable ${vercel_api_token} ${vercel_api_token} + Set Suite Variable ${VERCEL_TEAM_ID} ${VERCEL_TEAM_ID} + Set Suite Variable ${VERCEL_PROJECT} ${VERCEL_PROJECT} + Set Suite Variable ${LOOKBACK_MINUTES} ${LOOKBACK_MINUTES} + Set Suite Variable ${ERROR_RATE_THRESHOLD_PCT} ${ERROR_RATE_THRESHOLD_PCT} + Set Suite Variable ${MIN_ERROR_EVENTS} ${MIN_ERROR_EVENTS} + Set Suite Variable ${EXCLUDE_404_FROM_4XX} ${EXCLUDE_404_FROM_4XX} + + ${env}= Create Dictionary + ... VERCEL_TEAM_ID=${VERCEL_TEAM_ID} + ... VERCEL_PROJECT=${VERCEL_PROJECT} + ... LOOKBACK_MINUTES=${LOOKBACK_MINUTES} + ... ERROR_RATE_THRESHOLD_PCT=${ERROR_RATE_THRESHOLD_PCT} + ... MIN_ERROR_EVENTS=${MIN_ERROR_EVENTS} + ... EXCLUDE_404_FROM_4XX=${EXCLUDE_404_FROM_4XX} + Set Suite Variable ${env} ${env} diff --git a/codebundles/vercel-project-http-error-health/vercel-analyze-common.sh b/codebundles/vercel-project-http-error-health/vercel-analyze-common.sh new file mode 100755 index 00000000..460530f8 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-analyze-common.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1091 +# Shared analysis helpers (sourced from task scripts). + +vercel_compute_since_until_ms() { + local lookback="${LOOKBACK_MINUTES:-60}" + local now_ms + now_ms="$(python3 -c "import time; print(int(time.time()*1000))")" + local span_ms=$((lookback * 60 * 1000)) + SINCE_MS=$((now_ms - span_ms)) + UNTIL_MS=$now_ms +} + +# Sets VERCEL_PROJECT_ID, VERCEL_DEPLOYMENT_ID from API (requires vercel-lib.sh). +vercel_resolve_project_and_deployment_ids() { + local enc_proj enc_tid url raw http_code body dep + enc_proj="$(vercel_urlencode "${VERCEL_PROJECT}")" + enc_tid="$(vercel_urlencode "${VERCEL_TEAM_ID}")" + url="${VERCEL_API_BASE}/v9/projects/${enc_proj}?teamId=${enc_tid}" + raw="$(vercel_http_get "$url")" || return 1 + http_code=$(echo "$raw" | tail -n1) + body=$(echo "$raw" | sed '$d') + if [ "$http_code" != "200" ]; then + echo "resolve_error: project HTTP ${http_code}" >&2 + return 1 + fi + VERCEL_PROJECT_ID=$(echo "$body" | jq -r '.id // empty') + dep="$(vercel_latest_production_deployment_json "$VERCEL_PROJECT_ID")" || return 1 + if [ -z "$dep" ] || [ "$dep" = "null" ]; then + echo "resolve_error: no production deployment" >&2 + return 1 + fi + VERCEL_DEPLOYMENT_ID=$(echo "$dep" | jq -r '.uid // empty') +} + +# Filter request rows in time window; excludes delimiter rows without status. +vercel_filter_request_logs_json() { + local file="$1" + local since_ms="$2" + if [ ! -s "$file" ]; then + echo "[]" + return 0 + fi + # One JSON object per line (NDJSON); avoids failures on empty files (handled above). + jq -n --argjson since "$since_ms" ' + [ inputs + | select(type == "object") + | select((.timestampInMs | tonumber?) // 0 >= $since) + | select((.source | tostring) != "delimiter") + | select((.responseStatusCode | type) == "number") + ] + ' "$file" +} diff --git a/codebundles/vercel-project-http-error-health/vercel-lib.sh b/codebundles/vercel-project-http-error-health/vercel-lib.sh new file mode 100755 index 00000000..8a24a5b6 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-lib.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Shared helpers for Vercel HTTP error health checks. +VERCEL_API_BASE="${VERCEL_API_BASE:-https://api.vercel.com}" + +vercel_urlencode() { + python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1], safe=''))" "$1" +} + +vercel_bearer_header() { + if [ -z "${VERCEL_API_TOKEN:-}" ]; then + echo "vercel_lib_error: VERCEL_API_TOKEN is not set" >&2 + return 1 + fi + printf '%s' "Authorization: Bearer ${VERCEL_API_TOKEN}" +} + +# Fetch project JSON; prints HTTP status code as last line (after a NUL not used) — use split below. +vercel_http_get() { + local url="$1" + local hdr + hdr="$(vercel_bearer_header)" || return 1 + curl -sS --max-time 60 -H "$hdr" -H "Accept: application/json" -w "\n%{http_code}" "$url" +} + +vercel_latest_production_deployment_json() { + local pid="$1" + local enc_pid tid url raw + enc_pid="$(vercel_urlencode "$pid")" + tid="$(vercel_urlencode "${VERCEL_TEAM_ID}")" + url="${VERCEL_API_BASE}/v6/deployments?projectId=${enc_pid}&teamId=${tid}&target=production&limit=15" + raw="$(vercel_http_get "$url")" || return 1 + local code body + code=$(echo "$raw" | tail -n1) + body=$(echo "$raw" | sed '$d') + if [ "$code" != "200" ]; then + echo "{\"error\":\"deployments_list_failed\",\"httpStatus\":${code},\"body\":$(echo "$body" | jq -Rs .)}" >&2 + return 1 + fi + echo "$body" | jq -c '[.deployments[] | select(.readyState == "READY")] | sort_by(.createdAt // .created // "") | reverse | .[0] // empty' +} + +# Download runtime logs (NDJSON lines) into file; truncates to max_lines. +vercel_fetch_runtime_logs_file() { + local project_id="$1" + local deployment_id="$2" + local since_ms="$3" + local until_ms="$4" + local out_file="$5" + local max_lines="${6:-5000}" + local hdr enc_p enc_d tid url + hdr="$(vercel_bearer_header)" || return 1 + enc_p="$(vercel_urlencode "$project_id")" + enc_d="$(vercel_urlencode "$deployment_id")" + tid="$(vercel_urlencode "${VERCEL_TEAM_ID}")" + url="${VERCEL_API_BASE}/v1/projects/${enc_p}/deployments/${enc_d}/runtime-logs?teamId=${tid}&since=${since_ms}&until=${until_ms}&limit=${max_lines}" + curl -sS --max-time 120 -H "$hdr" -H "Accept: application/stream+json" "$url" | head -n "$max_lines" >"$out_file" +} diff --git a/codebundles/vercel-project-http-error-health/vercel-resolve-deployment.sh b/codebundles/vercel-project-http-error-health/vercel-resolve-deployment.sh new file mode 100755 index 00000000..40644fb5 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-resolve-deployment.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" + +OUTPUT_FILE="vercel_resolve_issues.json" +issues_json='[]' + +enc_proj="$(vercel_urlencode "${VERCEL_PROJECT}")" +enc_tid="$(vercel_urlencode "${VERCEL_TEAM_ID}")" +url="${VERCEL_API_BASE}/v9/projects/${enc_proj}?teamId=${enc_tid}" + +raw="$(vercel_http_get "$url")" || true +http_code=$(echo "$raw" | tail -n1) +body=$(echo "$raw" | sed '$d') + +if [ "$http_code" != "200" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Resolve Project Before Deployment Lookup for \`${VERCEL_PROJECT}\`" \ + --arg details "Project GET failed with HTTP ${http_code}. Run the validate task first." \ + --arg severity "3" \ + --arg next_steps "Fix project access (token, team id, project slug) then rerun." \ + '. += [{ + "title": $title, + "details": $details, + "severity": ($severity | tonumber), + "next_steps": $next_steps + }]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 0 +fi + +project_id=$(echo "$body" | jq -r '.id // empty') +project_name=$(echo "$body" | jq -r '.name // empty') + +dep_json="$(vercel_latest_production_deployment_json "$project_id")" || dep_json="" + +if [ -z "$dep_json" ] || [ "$dep_json" = "null" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "No READY Production Deployment for Project \`${VERCEL_PROJECT}\`" \ + --arg details "Could not find a READY deployment with target=production. Edge-only or paused projects may have no runtime logs." \ + --arg severity "3" \ + --arg next_steps "Deploy to production or verify the project has a production deployment. Preview-only traffic will not appear in production log queries." \ + '. += [{ + "title": $title, + "details": $details, + "severity": ($severity | tonumber), + "next_steps": $next_steps + }]') + echo "$issues_json" > "$OUTPUT_FILE" + echo "No production deployment found." + exit 0 +fi + +dep_uid=$(echo "$dep_json" | jq -r '.uid // empty') +dep_url=$(echo "$dep_json" | jq -r '.url // empty') +created=$(echo "$dep_json" | jq -r '.createdAt // empty') + +echo "Production deployment for analysis:" +echo " project_id=${project_id} (${project_name})" +echo " deployment_id=${dep_uid}" +echo " url=${dep_url}" +echo " createdAt=${created}" +echo "Lookback note: runtime logs use deployment ${dep_uid} as the log source; timestamps are filtered to LOOKBACK_MINUTES." + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Resolve step completed. Issues saved to $OUTPUT_FILE" diff --git a/codebundles/vercel-project-http-error-health/vercel-sli-health.sh b/codebundles/vercel-project-http-error-health/vercel-sli-health.sh new file mode 100755 index 00000000..094da4e7 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-sli-health.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +set -euo pipefail +# Lightweight SLI snapshot: binary 5xx and 4xx health, aggregated score (stdout JSON only). + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +LOOKBACK_MINUTES="${LOOKBACK_MINUTES:-60}" +ERROR_RATE_THRESHOLD_PCT="${ERROR_RATE_THRESHOLD_PCT:-1}" +MIN_ERROR_EVENTS="${MIN_ERROR_EVENTS:-5}" +EXCLUDE_404_FROM_4XX="${EXCLUDE_404_FROM_4XX:-true}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" +# shellcheck source=vercel-analyze-common.sh +source "${SCRIPT_DIR}/vercel-analyze-common.sh" + +vercel_compute_since_until_ms + +score_5xx=1 +score_4xx=1 + +if ! vercel_resolve_project_and_deployment_ids; then + echo '{"score_5xx":0,"score_4xx":0,"aggregate":0,"detail":"resolve_failed"}' + exit 0 +fi + +LOGF=$(mktemp) +trap 'rm -f "$LOGF"' EXIT + +vercel_fetch_runtime_logs_file "$VERCEL_PROJECT_ID" "$VERCEL_DEPLOYMENT_ID" "$SINCE_MS" "$UNTIL_MS" "$LOGF" 1500 || true + +filtered="$(vercel_filter_request_logs_json "$LOGF" "$SINCE_MS")" +total=$(echo "$filtered" | jq 'length') +five=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 500 and .responseStatusCode <= 599)] | length') + +if [ "${EXCLUDE_404_FROM_4XX}" = "true" ] || [ "${EXCLUDE_404_FROM_4XX}" = "True" ]; then + four=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 400 and .responseStatusCode <= 499 and .responseStatusCode != 404)] | length') +else + four=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 400 and .responseStatusCode <= 499)] | length') +fi + +rate_5=$(echo "$filtered" | jq -r --argjson f "$five" --argjson t "$total" 'if ($t > 0) then (($f * 100) / $t) else 0 end') +rate_4=$(echo "$filtered" | jq -r --argjson f "$four" --argjson t "$total" 'if ($t > 0) then (($f * 100) / $t) else 0 end') + +th="${ERROR_RATE_THRESHOLD_PCT}" +over5=$(awk -v r="$rate_5" -v t="$th" 'BEGIN{ if (r+0 > t+0) print 1; else print 0 }') +over4=$(awk -v r="$rate_4" -v t="$th" 'BEGIN{ if (r+0 > t+0) print 1; else print 0 }') + +# Mirror runbook: need min events for a failing score when rate high +if [ "$over5" -eq 1 ] && [ "$five" -ge "${MIN_ERROR_EVENTS}" ]; then score_5xx=0; fi +if [ "$over4" -eq 1 ] && [ "$four" -ge "${MIN_ERROR_EVENTS}" ]; then score_4xx=0; fi + +aggregate=$(awk -v a="$score_5xx" -v b="$score_4xx" 'BEGIN{ printf "%.4f", (a+b)/2 }') +detail="samples=${total} 5xx=${five}(${rate_5}%) 4xx=${four}(${rate_4}%)" + +jq -n \ + --argjson s5 "$score_5xx" \ + --argjson s4 "$score_4xx" \ + --arg agg "$aggregate" \ + --arg d "$detail" \ + '{score_5xx: $s5, score_4xx: $s4, aggregate: ($agg | tonumber), detail: $d}' diff --git a/codebundles/vercel-project-http-error-health/vercel-summarize-4xx-rate.sh b/codebundles/vercel-project-http-error-health/vercel-summarize-4xx-rate.sh new file mode 100755 index 00000000..c12dadff --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-summarize-4xx-rate.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +LOOKBACK_MINUTES="${LOOKBACK_MINUTES:-60}" +ERROR_RATE_THRESHOLD_PCT="${ERROR_RATE_THRESHOLD_PCT:-1}" +MIN_ERROR_EVENTS="${MIN_ERROR_EVENTS:-5}" +EXCLUDE_404_FROM_4XX="${EXCLUDE_404_FROM_4XX:-true}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" +# shellcheck source=vercel-analyze-common.sh +source "${SCRIPT_DIR}/vercel-analyze-common.sh" + +OUTPUT_FILE="vercel_4xx_issues.json" +issues_json='[]' + +vercel_compute_since_until_ms + +if ! vercel_resolve_project_and_deployment_ids; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Analyze 4xx Rate — Project or Deployment Unavailable for \`${VERCEL_PROJECT}\`" \ + --arg details "Failed to resolve project id or latest READY production deployment." \ + --arg severity "3" \ + --arg next_steps "Run validate and resolve tasks; confirm production deployment exists." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 0 +fi + +LOGF=$(mktemp) +trap 'rm -f "$LOGF"' EXIT + +vercel_fetch_runtime_logs_file "$VERCEL_PROJECT_ID" "$VERCEL_DEPLOYMENT_ID" "$SINCE_MS" "$UNTIL_MS" "$LOGF" 5000 || true + +filtered="$(vercel_filter_request_logs_json "$LOGF" "$SINCE_MS")" + +if [ "${EXCLUDE_404_FROM_4XX}" = "true" ] || [ "${EXCLUDE_404_FROM_4XX}" = "True" ]; then + four=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 400 and .responseStatusCode <= 499 and .responseStatusCode != 404)] | length') +else + four=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 400 and .responseStatusCode <= 499)] | length') +fi + +total=$(echo "$filtered" | jq 'length') +rate_pct=$(echo "$filtered" | jq -r --argjson f "$four" --argjson t "$total" 'if ($t > 0) then (($f * 100) / $t) else 0 end') + +echo "4xx summary (EXCLUDE_404_FROM_4XX=${EXCLUDE_404_FROM_4XX}):" +echo " total_request_rows=${total} 4xx_count=${four} rate_percent=${rate_pct}" +echo " lookback_minutes=${LOOKBACK_MINUTES} threshold_pct=${ERROR_RATE_THRESHOLD_PCT} min_events=${MIN_ERROR_EVENTS}" + +th="${ERROR_RATE_THRESHOLD_PCT}" +over_rate=$(awk -v r="$rate_pct" -v t="$th" 'BEGIN{ if (r+0 > t+0) print 1; else print 0 }') + +if [ "$over_rate" -eq 1 ] && [ "$four" -ge "${MIN_ERROR_EVENTS}" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Elevated 4xx Client Error Rate for Vercel Project \`${VERCEL_PROJECT}\`" \ + --arg details "Sampled ${total} rows with ${four} client errors (~${rate_pct}%, threshold ${th}%). 404 excluded: ${EXCLUDE_404_FROM_4XX}." \ + --arg severity "3" \ + --arg next_steps "Review validation/auth and routing for high-volume 4xx paths; compare top 4xx paths task." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') +elif [ "$over_rate" -eq 1 ] && [ "$four" -lt "${MIN_ERROR_EVENTS}" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "4xx Rate Above Threshold but Below Minimum Event Count for \`${VERCEL_PROJECT}\`" \ + --arg details "Rate ${rate_pct}% exceeds ${th}% but only ${four} events (< MIN_ERROR_EVENTS)." \ + --arg severity "2" \ + --arg next_steps "Tune MIN_ERROR_EVENTS or LOOKBACK_MINUTES to reduce noise." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') +fi + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE" diff --git a/codebundles/vercel-project-http-error-health/vercel-summarize-5xx-rate.sh b/codebundles/vercel-project-http-error-health/vercel-summarize-5xx-rate.sh new file mode 100755 index 00000000..0fc0433f --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-summarize-5xx-rate.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +LOOKBACK_MINUTES="${LOOKBACK_MINUTES:-60}" +ERROR_RATE_THRESHOLD_PCT="${ERROR_RATE_THRESHOLD_PCT:-1}" +MIN_ERROR_EVENTS="${MIN_ERROR_EVENTS:-5}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" +# shellcheck source=vercel-analyze-common.sh +source "${SCRIPT_DIR}/vercel-analyze-common.sh" + +OUTPUT_FILE="vercel_5xx_issues.json" +issues_json='[]' + +vercel_compute_since_until_ms + +if ! vercel_resolve_project_and_deployment_ids; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Analyze 5xx Rate — Project or Deployment Unavailable for \`${VERCEL_PROJECT}\`" \ + --arg details "Failed to resolve project id or latest READY production deployment." \ + --arg severity "3" \ + --arg next_steps "Run validate and resolve tasks; confirm production deployment exists." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 0 +fi + +LOGF=$(mktemp) +trap 'rm -f "$LOGF"' EXIT + +vercel_fetch_runtime_logs_file "$VERCEL_PROJECT_ID" "$VERCEL_DEPLOYMENT_ID" "$SINCE_MS" "$UNTIL_MS" "$LOGF" 5000 || true + +filtered="$(vercel_filter_request_logs_json "$LOGF" "$SINCE_MS")" +total=$(echo "$filtered" | jq 'length') +five=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 500 and .responseStatusCode <= 599)] | length') +rate_pct=$(echo "$filtered" | jq -r --argjson f "$five" --argjson t "$total" 'if ($t > 0) then (($f * 100) / $t) else 0 end') + +echo "5xx summary (sampled runtime logs, deployment ${VERCEL_DEPLOYMENT_ID}):" +echo " total_request_rows=${total} 5xx_count=${five} rate_percent=${rate_pct}" +echo " lookback_minutes=${LOOKBACK_MINUTES} threshold_pct=${ERROR_RATE_THRESHOLD_PCT} min_events=${MIN_ERROR_EVENTS}" + +th="${ERROR_RATE_THRESHOLD_PCT}" +# bc or awk for float compare +over_rate=$(awk -v r="$rate_pct" -v t="$th" 'BEGIN{ if (r+0 > t+0) print 1; else print 0 }') + +if [ "$over_rate" -eq 1 ] && [ "$five" -ge "${MIN_ERROR_EVENTS}" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Elevated 5xx Rate for Vercel Project \`${VERCEL_PROJECT}\`" \ + --arg details "In the last ${LOOKBACK_MINUTES} minutes (deployment ${VERCEL_DEPLOYMENT_ID}), sampled ${total} request log rows with ${five} server errors (~${rate_pct}% > threshold ${th}%)." \ + --arg severity "3" \ + --arg next_steps "Inspect failing routes and upstream dependencies; check recent deploys and function logs. Review top 5xx paths task output." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') +elif [ "$over_rate" -eq 1 ] && [ "$five" -lt "${MIN_ERROR_EVENTS}" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "5xx Rate Above Threshold but Below Minimum Event Count for \`${VERCEL_PROJECT}\`" \ + --arg details "Rate ${rate_pct}% exceeds ${th}% but only ${five} events (< MIN_ERROR_EVENTS=${MIN_ERROR_EVENTS}). Sample size may be noisy." \ + --arg severity "2" \ + --arg next_steps "Lower MIN_ERROR_EVENTS or widen LOOKBACK_MINUTES if this signal is too sensitive." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') +fi + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE" diff --git a/codebundles/vercel-project-http-error-health/vercel-top-paths-4xx.sh b/codebundles/vercel-project-http-error-health/vercel-top-paths-4xx.sh new file mode 100755 index 00000000..34ec0f87 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-top-paths-4xx.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +LOOKBACK_MINUTES="${LOOKBACK_MINUTES:-60}" +EXCLUDE_404_FROM_4XX="${EXCLUDE_404_FROM_4XX:-true}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" +# shellcheck source=vercel-analyze-common.sh +source "${SCRIPT_DIR}/vercel-analyze-common.sh" + +OUTPUT_FILE="vercel_top_4xx_issues.json" +issues_json='[]' + +vercel_compute_since_until_ms + +if ! vercel_resolve_project_and_deployment_ids; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot List 4xx Paths — Project or Deployment Unavailable for \`${VERCEL_PROJECT}\`" \ + --arg details "Failed to resolve project or deployment for log analysis." \ + --arg severity "3" \ + --arg next_steps "Run validate and resolve-deployment tasks first." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 0 +fi + +LOGF=$(mktemp) +trap 'rm -f "$LOGF"' EXIT + +vercel_fetch_runtime_logs_file "$VERCEL_PROJECT_ID" "$VERCEL_DEPLOYMENT_ID" "$SINCE_MS" "$UNTIL_MS" "$LOGF" 5000 || true + +filtered="$(vercel_filter_request_logs_json "$LOGF" "$SINCE_MS")" + +if [ "${EXCLUDE_404_FROM_4XX}" = "true" ] || [ "${EXCLUDE_404_FROM_4XX}" = "True" ]; then + top_json=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 400 and .responseStatusCode <= 499 and .responseStatusCode != 404) | (.requestPath // "/")] | group_by(.) | map({path: .[0], count: length}) | sort_by(-.count) | .[0:15]') +else + top_json=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 400 and .responseStatusCode <= 499) | (.requestPath // "/")] | group_by(.) | map({path: .[0], count: length}) | sort_by(-.count) | .[0:15]') +fi + +echo "Top paths by 4xx count (EXCLUDE_404_FROM_4XX=${EXCLUDE_404_FROM_4XX}, deployment ${VERCEL_DEPLOYMENT_ID}):" +echo "$top_json" | jq . + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE (informational; issues list typically empty)" diff --git a/codebundles/vercel-project-http-error-health/vercel-top-paths-5xx.sh b/codebundles/vercel-project-http-error-health/vercel-top-paths-5xx.sh new file mode 100755 index 00000000..58b01499 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-top-paths-5xx.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +LOOKBACK_MINUTES="${LOOKBACK_MINUTES:-60}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" +# shellcheck source=vercel-analyze-common.sh +source "${SCRIPT_DIR}/vercel-analyze-common.sh" + +OUTPUT_FILE="vercel_top_5xx_issues.json" +issues_json='[]' + +vercel_compute_since_until_ms + +if ! vercel_resolve_project_and_deployment_ids; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot List 5xx Paths — Project or Deployment Unavailable for \`${VERCEL_PROJECT}\`" \ + --arg details "Failed to resolve project or deployment for log analysis." \ + --arg severity "3" \ + --arg next_steps "Run validate and resolve-deployment tasks first." \ + '. += [{"title": $title, "details": $details, "severity": ($severity | tonumber), "next_steps": $next_steps}]') + echo "$issues_json" > "$OUTPUT_FILE" + exit 0 +fi + +LOGF=$(mktemp) +trap 'rm -f "$LOGF"' EXIT + +vercel_fetch_runtime_logs_file "$VERCEL_PROJECT_ID" "$VERCEL_DEPLOYMENT_ID" "$SINCE_MS" "$UNTIL_MS" "$LOGF" 5000 || true + +filtered="$(vercel_filter_request_logs_json "$LOGF" "$SINCE_MS")" +top_json=$(echo "$filtered" | jq '[.[] | select(.responseStatusCode >= 500 and .responseStatusCode <= 599) | (.requestPath // "/")] | group_by(.) | map({path: .[0], count: length}) | sort_by(-.count) | .[0:15]') + +echo "Top paths by 5xx count (deployment ${VERCEL_DEPLOYMENT_ID}, lookback ${LOOKBACK_MINUTES}m):" +echo "$top_json" | jq . + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Wrote $OUTPUT_FILE (informational; issues list typically empty)" diff --git a/codebundles/vercel-project-http-error-health/vercel-validate-project.sh b/codebundles/vercel-project-http-error-health/vercel-validate-project.sh new file mode 100755 index 00000000..46f855f7 --- /dev/null +++ b/codebundles/vercel-project-http-error-health/vercel-validate-project.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +: "${VERCEL_TEAM_ID:?Must set VERCEL_TEAM_ID}" +: "${VERCEL_PROJECT:?Must set VERCEL_PROJECT}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=vercel-lib.sh +source "${SCRIPT_DIR}/vercel-lib.sh" + +OUTPUT_FILE="vercel_validate_issues.json" +issues_json='[]' + +enc_proj="$(vercel_urlencode "${VERCEL_PROJECT}")" +enc_tid="$(vercel_urlencode "${VERCEL_TEAM_ID}")" +url="${VERCEL_API_BASE}/v9/projects/${enc_proj}?teamId=${enc_tid}" + +if ! raw="$(vercel_http_get "$url")"; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Cannot Reach Vercel API for Project \`${VERCEL_PROJECT}\`" \ + --arg details "Network or TLS error calling ${url}" \ + --arg severity "4" \ + --arg next_steps "Verify outbound HTTPS access to api.vercel.com and retry. Confirm VERCEL_API_TOKEN is injected." \ + '. += [{ + "title": $title, + "details": $details, + "severity": ($severity | tonumber), + "next_steps": $next_steps + }]') + echo "$issues_json" > "$OUTPUT_FILE" + echo "Validation failed (curl error). Issues written to $OUTPUT_FILE" + exit 0 +fi + +http_code=$(echo "$raw" | tail -n1) +body=$(echo "$raw" | sed '$d') + +if [ "$http_code" = "401" ] || [ "$http_code" = "403" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Vercel API Authorization Failed for Project \`${VERCEL_PROJECT}\`" \ + --arg details "HTTP ${http_code}: $(echo "$body" | jq -c . 2>/dev/null || echo "$body")" \ + --arg severity "4" \ + --arg next_steps "Create or rotate a Vercel token with team access. Confirm VERCEL_TEAM_ID matches the token scope." \ + '. += [{ + "title": $title, + "details": $details, + "severity": ($severity | tonumber), + "next_steps": $next_steps + }]') +elif [ "$http_code" = "404" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Vercel Project Not Found: \`${VERCEL_PROJECT}\`" \ + --arg details "HTTP 404 from GET /v9/projects. Response: $(echo "$body" | head -c 2000)" \ + --arg severity "4" \ + --arg next_steps "Verify VERCEL_PROJECT is the slug or id under team VERCEL_TEAM_ID. Check team switcher matches VERCEL_TEAM_ID." \ + '. += [{ + "title": $title, + "details": $details, + "severity": ($severity | tonumber), + "next_steps": $next_steps + }]') +elif [ "$http_code" != "200" ]; then + issues_json=$(echo "$issues_json" | jq \ + --arg title "Unexpected Vercel API Response for Project \`${VERCEL_PROJECT}\`" \ + --arg details "HTTP ${http_code}: $(echo "$body" | head -c 2000)" \ + --arg severity "3" \ + --arg next_steps "Retry later, check Vercel status page, and confirm API compatibility." \ + '. += [{ + "title": $title, + "details": $details, + "severity": ($severity | tonumber), + "next_steps": $next_steps + }]') +fi + +if [ "$http_code" = "200" ]; then + pid=$(echo "$body" | jq -r '.id // empty') + pname=$(echo "$body" | jq -r '.name // empty') + echo "Resolved Vercel project: id=${pid} name=${pname}" +fi + +echo "$issues_json" > "$OUTPUT_FILE" +echo "Validation finished. Issues saved to $OUTPUT_FILE"