diff --git a/.github/workflows/deploy-azure.yml b/.github/workflows/deploy-azure.yml index 4a7207d..9b51b44 100644 --- a/.github/workflows/deploy-azure.yml +++ b/.github/workflows/deploy-azure.yml @@ -52,6 +52,18 @@ jobs: steps: - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + - name: Set up Node + uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5 + with: + node-version: "24" + cache: pnpm + + - name: Enable Corepack + run: corepack enable + + - name: Install smoke dependencies + run: pnpm install --frozen-lockfile + - name: Install OpenTofu uses: opentofu/setup-opentofu@847eaa4afeb791b06daa46e8eafa8b1b68d7cfb4 # v2.0.1 @@ -141,6 +153,7 @@ jobs: { echo "resource_group_name=$(tofu output -raw resource_group_name)" + echo "app_endpoint=$(tofu output -raw app_endpoint)" echo "app_container_app_name=$(tofu output -raw app_container_app_name)" echo "worker_container_app_name=$(tofu output -raw worker_container_app_name)" echo "migration_job_name=$(tofu output -raw migration_job_name)" @@ -251,6 +264,41 @@ jobs: echo "worker_revision=$worker_revision" } >> "$GITHUB_OUTPUT" + - name: Smoke verify deployment + id: smoke + if: ${{ steps.promote-app-worker.outcome == 'success' }} + shell: bash + env: + TARGET_ENVIRONMENT: ${{ inputs.environment }} + APP_ENDPOINT: ${{ steps.provision.outputs.app_endpoint }} + RESOURCE_GROUP: ${{ steps.provision.outputs.resource_group_name }} + APP_CONTAINER_APP_NAME: ${{ steps.provision.outputs.app_container_app_name }} + WORKER_CONTAINER_APP_NAME: ${{ steps.provision.outputs.worker_container_app_name }} + MIGRATION_JOB_NAME: ${{ steps.provision.outputs.migration_job_name }} + MIGRATION_EXECUTION: ${{ steps.migrate.outputs.migration_execution }} + run: | + set +e + output="$(pnpm run smoke:azure -- \ + --environment "$TARGET_ENVIRONMENT" \ + --app-endpoint "$APP_ENDPOINT" \ + --resource-group "$RESOURCE_GROUP" \ + --app-name "$APP_CONTAINER_APP_NAME" \ + --worker-name "$WORKER_CONTAINER_APP_NAME" \ + --migration-job-name "$MIGRATION_JOB_NAME" \ + --migration-execution "$MIGRATION_EXECUTION" 2>&1)" + status=$? + set -e + + printf '%s\n' "$output" + { + echo "### Azure deployment smoke" + echo + echo '```text' + printf '%s\n' "$output" + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + exit "$status" + - name: Report deployment id: report if: ${{ always() }} @@ -261,6 +309,7 @@ jobs: MIGRATION_EXECUTION: ${{ steps.migrate.outputs.migration_execution || 'n/a' }} APP_REVISION: ${{ steps.promote-app-worker.outputs.app_revision || 'not-promoted' }} WORKER_REVISION: ${{ steps.promote-app-worker.outputs.worker_revision || 'not-promoted' }} + SMOKE_RESULT: ${{ steps.smoke.outcome || 'not-run' }} run: | { echo "### Azure deployment" @@ -270,4 +319,5 @@ jobs: echo "- Migration execution: $MIGRATION_EXECUTION" echo "- App revision: $APP_REVISION" echo "- Worker revision: $WORKER_REVISION" + echo "- Smoke result: $SMOKE_RESULT" } >> "$GITHUB_STEP_SUMMARY" diff --git a/.specify/feature.json b/.specify/feature.json index 24ec852..52c047d 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1,3 +1,3 @@ { - "feature_directory": "specs/019-logging-standardization" + "feature_directory": "specs/020-deploy-smoke-verification" } diff --git a/AGENTS.md b/AGENTS.md index 8c94cea..43ccdb8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,6 @@ # webapp-template Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-06-10 +Auto-generated from all feature plans. Last updated: 2026-06-11 ## Workflow First Step @@ -10,6 +10,9 @@ Auto-generated from all feature plans. Last updated: 2026-06-10 ## Active Technologies +- TypeScript 5.9 on Node.js via the existing `tsx` dev dependency + Node built-ins, existing `tsx`, Azure CLI available in deployment runners (020-deploy-smoke-verification) +- No new storage; smoke evidence remains command output and GitHub step summary (020-deploy-smoke-verification) + - TypeScript 5.9 on Next.js 16 App Router with React 19; Python 3.12 worker; PowerShell/Node validation scripts + Existing `src/lib/logger.ts`, `src/proxy.ts`, `src/instrumentation.ts`, Prisma-backed services, Python stdlib `logging`/`json`, Vitest, Playwright, existing validation scripts (019-logging-standardization) - No new storage; operational logs remain process output; audit records remain Prisma-backed and separate (019-logging-standardization) @@ -38,11 +41,11 @@ TypeScript 5.9 on Next.js 16 App Router (React 19): Follow standard conventions ## Recent Changes +- 020-deploy-smoke-verification: Added TypeScript 5.9 on Node.js via the existing `tsx` dev dependency + Node built-ins, existing `tsx`, Azure CLI available in deployment runners + - 019-logging-standardization: Added TypeScript 5.9 on Next.js 16 App Router with React 19; Python 3.12 worker; PowerShell/Node validation scripts + Existing `src/lib/logger.ts`, `src/proxy.ts`, `src/instrumentation.ts`, Prisma-backed services, Python stdlib `logging`/`json`, Vitest, Playwright, existing validation scripts - 017-deepsec-remediation: Added TypeScript 5.9 on Next.js 16 App Router, React 19, Python 3.12 worker where affected, PowerShell validation scripts + Prisma 7, Better Auth, Zod, Vitest, Playwright, GitHub Actions, GoReleaser, DeepSec 2.0.12 -- 011-route-refactor: Added TypeScript 5.9 on Next.js 16 App Router (React 19) + Next.js 16, React 19, Prisma 7, Better Auth, Zod, Vitest, Playwright, jscpd - diff --git a/CONTINUE.md b/CONTINUE.md index e2170e1..c0c9850 100644 --- a/CONTINUE.md +++ b/CONTINUE.md @@ -1,15 +1,15 @@ # Continue - + ## Current Snapshot -- Updated: 2026-06-11 09:27:00 -- Branch: `main` +- Updated: 2026-06-11 11:06:34 +- Branch: `020-deploy-smoke-verification` ## Recent Non-Continuity Commits -- 34de987 chore: record clean handoff +- 3d52264 fix: move state queue logging to dedicated resource - 25306fd chore: refresh specs overview - dd226de test: update opentofu action pin assertion - 9b92cb5 ci: update opentofu setup action @@ -17,9 +17,20 @@ ## Git Status -- M CONTINUE.md -- M CONTINUE_LOG.md -- M infra/azure/bootstrap/main.tf +- M .github/workflows/deploy-azure.yml +- M .specify/feature.json +- M ACTIVE_SPECS.md +- M AGENTS.md +- M package.json +- M specs/018-opentofu-azure-infra/quickstart.md +- M specs/OVERVIEW.md +- M tests/unit/security/deploy-workflow.test.ts +- ?? docs/azure-deploy-smoke.md +- ?? scripts/azure-deploy-smoke.ts +- ?? scripts/run-azure-deploy-smoke.mjs +- ?? specs/020-deploy-smoke-verification/ +- ?? tests/integration/azure-deploy-smoke-cli.test.ts +- ?? tests/unit/azure-deploy-smoke.test.ts ## Active Specs @@ -27,6 +38,6 @@ ## Next Recommended Actions -1. Commit and push the OpenTofu AzureRM v5 compatibility cleanup. -2. Confirm main validation after the cleanup commit. -3. Start the next feature/spec. +1. Commit and push `020-deploy-smoke-verification`. +2. Open a pull request for the deployment smoke verification feature. +3. Confirm GitHub Actions validation, then merge if green. diff --git a/CONTINUE_LOG.md b/CONTINUE_LOG.md index 17fd29d..d7085b3 100644 --- a/CONTINUE_LOG.md +++ b/CONTINUE_LOG.md @@ -1493,3 +1493,19 @@ - OpenTofu infrastructure validation no longer emits the AzureRM queue properties deprecation warning. - Active specs: none. - Next focus: commit/push this maintenance cleanup and confirm main validation. + +## 2026-06-11 10:53:57 + +- Branch snapshot refreshed for `020-deploy-smoke-verification`. +- Latest non-continuity commit: 3d52264 fix: move state queue logging to dedicated resource. +- Active specs: 018-opentofu-azure-infra, 020-deploy-smoke-verification. +- Next focus: 020-deploy-smoke-verification: T020. + +## 2026-06-11 11:06:34 + +- Implemented spec `020-deploy-smoke-verification` on branch `020-deploy-smoke-verification`. +- Added `pnpm run smoke:azure` with a TypeScript smoke verifier for app health, migration execution, app revision health, and worker revision health. +- Wired smoke verification into `.github/workflows/deploy-azure.yml` after app/worker revision promotion and added operator documentation. +- Validation passed: focused smoke/workflow tests and `.\validate.ps1 all`. +- Active specs: none. +- Next focus: commit/push the feature branch, open a PR, and confirm GitHub Actions validation. diff --git a/docs/azure-deploy-smoke.md b/docs/azure-deploy-smoke.md new file mode 100644 index 0000000..ec82fc4 --- /dev/null +++ b/docs/azure-deploy-smoke.md @@ -0,0 +1,46 @@ +# Azure Deploy Smoke Verification + +Use the Azure deploy smoke check after a deployment to prove that the app endpoint, migration job, web revision, and worker revision are healthy. + +## Prerequisites + +- Azure CLI authenticated for the target subscription. +- Values from the Azure OpenTofu outputs or the deployment workflow summary. +- Project dependencies installed with `pnpm install --frozen-lockfile`. + +## Local Run + +```bash +cd infra/azure + +pnpm --dir ../.. run smoke:azure -- \ + --environment dev \ + --app-endpoint "$(tofu output -raw app_endpoint)" \ + --resource-group "$(tofu output -raw resource_group_name)" \ + --app-name "$(tofu output -raw app_container_app_name)" \ + --worker-name "$(tofu output -raw worker_container_app_name)" \ + --migration-job-name "$(tofu output -raw migration_job_name)" +``` + +The command exits `0` only when all required checks pass. It exits `1` when a smoke check fails and `2` when configuration is missing or invalid. + +## JSON Output + +Use `--json` when another tool needs the smoke result. + +```bash +pnpm run smoke:azure -- --json --environment dev ... +``` + +The JSON report includes the target environment, overall status, and one result per check. Output is sanitized before printing. + +## GitHub Actions + +The `Deploy Azure` workflow runs smoke verification after migration success and app/worker revision promotion. The workflow fails if smoke verification fails, and the GitHub step summary includes the sanitized smoke output. + +## Troubleshooting + +- `app-health` failed: open the checked `/api/health` URL, inspect database health, and check recent app logs. +- `migration` failed: inspect the named Container Apps Job execution. +- `app-revision` or `worker-revision` failed: inspect active revisions for the named Container App. +- Configuration failed: confirm the workflow variables and OpenTofu outputs match the target environment. diff --git a/package.json b/package.json index 5e791fe..1e84773 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "quality:python": "node scripts/check-python-quality.mjs", "quality:cli": "node scripts/check-cli-quality.mjs", "logging:guard": "node scripts/check-logging-guard.mjs", + "smoke:azure": "node scripts/run-azure-deploy-smoke.mjs", "supply-chain:audit": "pwsh -NoProfile -ExecutionPolicy Bypass -File scripts/supply-chain-audit.ps1", "validate:runtime-credentials": "pwsh -NoProfile -ExecutionPolicy Bypass -File scripts/validate-runtime-credentials.ps1 -SelfTest", "worker:lint": "cd worker && uv run ruff check src tests", diff --git a/scripts/azure-deploy-smoke.ts b/scripts/azure-deploy-smoke.ts new file mode 100644 index 0000000..089f3b1 --- /dev/null +++ b/scripts/azure-deploy-smoke.ts @@ -0,0 +1,510 @@ +import { spawnSync } from "node:child_process"; +import { fileURLToPath } from "node:url"; + +export type SmokeStatus = "pass" | "fail"; + +export type SmokeTarget = { + environment: string; + appEndpoint: string; + resourceGroup: string; + appName: string; + workerName: string; + migrationJobName: string; + migrationExecutionName?: string; + timeoutSeconds: number; + json: boolean; +}; + +export type SmokeCheck = { + name: string; + status: SmokeStatus; + target: string; + durationMs: number; + message: string; + details?: Record; +}; + +export type SmokeReport = { + environment: string; + startedAt: string; + finishedAt: string; + status: SmokeStatus; + checks: SmokeCheck[]; +}; + +export type HttpResponse = { + status: number; + body: unknown; +}; + +export type HttpRunner = ( + url: string, + timeoutMs: number, +) => Promise; + +export type CommandResult = { + status: number; + stdout: string; + stderr: string; +}; + +export type CommandRunner = (args: string[]) => Promise; + +export type SmokeRunners = { + http: HttpRunner; + command: CommandRunner; + now?: () => Date; +}; + +type ParsedArgs = { + values: Record; + json: boolean; +}; + +const DEFAULT_TIMEOUT_SECONDS = 120; +const SECRET_PATTERNS = [ + /(password|passwd|pwd|secret|token|key|connectionstring|connection_string)=([^&\s"']+)/gi, + /(Bearer\s+)[A-Za-z0-9._~+/=-]+/gi, +]; + +export class SmokeConfigError extends Error {} + +export function buildHealthUrl(appEndpoint: string): string { + const url = new URL(appEndpoint); + const basePath = url.pathname.replace(/\/+$/, ""); + url.pathname = `${basePath}/api/health`.replace(/\/{2,}/g, "/"); + url.search = ""; + url.hash = ""; + return url.toString(); +} + +export function sanitize(value: unknown): unknown { + if (typeof value === "string") { + return SECRET_PATTERNS.reduce( + (text, pattern) => + text.replace(pattern, (_match, prefix) => `${prefix}[REDACTED]`), + value, + ); + } + + if (Array.isArray(value)) { + return value.map((item) => sanitize(item)); + } + + if (value && typeof value === "object") { + return Object.fromEntries( + Object.entries(value).map(([key, item]) => [ + key, + /password|passwd|pwd|secret|token|key|connection|string/i.test(key) + ? "[REDACTED]" + : sanitize(item), + ]), + ); + } + + return value; +} + +export function parseSmokeTarget( + argv: string[], + env: NodeJS.ProcessEnv, +): SmokeTarget { + const parsed = parseArgs(argv); + const read = (flag: string, envName: string) => + parsed.values[flag] ?? env[envName]; + const timeoutRaw = + read("timeout-seconds", "SMOKE_TIMEOUT_SECONDS") ?? + `${DEFAULT_TIMEOUT_SECONDS}`; + const timeoutSeconds = Number(timeoutRaw); + const target: SmokeTarget = { + environment: requireValue( + read("environment", "SMOKE_ENVIRONMENT"), + "environment", + ), + appEndpoint: requireValue( + read("app-endpoint", "SMOKE_APP_ENDPOINT"), + "app-endpoint", + ), + resourceGroup: requireValue( + read("resource-group", "SMOKE_RESOURCE_GROUP"), + "resource-group", + ), + appName: requireValue(read("app-name", "SMOKE_APP_NAME"), "app-name"), + workerName: requireValue( + read("worker-name", "SMOKE_WORKER_NAME"), + "worker-name", + ), + migrationJobName: requireValue( + read("migration-job-name", "SMOKE_MIGRATION_JOB_NAME"), + "migration-job-name", + ), + migrationExecutionName: read( + "migration-execution", + "SMOKE_MIGRATION_EXECUTION", + ), + timeoutSeconds, + json: parsed.json || env.SMOKE_JSON === "true", + }; + + if ( + !Number.isFinite(timeoutSeconds) || + timeoutSeconds <= 0 || + timeoutSeconds > 1800 + ) { + throw new SmokeConfigError("timeout-seconds must be between 1 and 1800"); + } + + try { + const endpoint = new URL(target.appEndpoint); + if (!["http:", "https:"].includes(endpoint.protocol)) { + throw new Error("invalid protocol"); + } + } catch { + throw new SmokeConfigError( + "app-endpoint must be an absolute HTTP or HTTPS URL", + ); + } + + return target; +} + +export async function runSmoke( + target: SmokeTarget, + runners: SmokeRunners, +): Promise { + const now = runners.now ?? (() => new Date()); + const startedAt = now().toISOString(); + const checks: SmokeCheck[] = []; + + checks.push( + await timeCheck("app-health", buildHealthUrl(target.appEndpoint), () => + checkAppHealth(target, runners.http), + ), + ); + checks.push( + await timeCheck("migration", target.migrationJobName, () => + checkMigration(target, runners.command), + ), + ); + checks.push( + await timeCheck("app-revision", target.appName, () => + checkRevision(target, target.appName, runners.command), + ), + ); + checks.push( + await timeCheck("worker-revision", target.workerName, () => + checkRevision(target, target.workerName, runners.command), + ), + ); + + const finishedAt = now().toISOString(); + return sanitize({ + environment: target.environment, + startedAt, + finishedAt, + status: checks.every((check) => check.status === "pass") ? "pass" : "fail", + checks, + }) as SmokeReport; +} + +export function renderHumanReport(report: SmokeReport): string { + const lines = [`Azure deployment smoke: ${report.environment}`]; + for (const check of report.checks) { + lines.push( + `${check.status.toUpperCase()} ${check.name} ${check.target} - ${check.message}`, + ); + if (check.status === "fail" && check.details) { + lines.push(` details: ${JSON.stringify(check.details)}`); + } + } + lines.push(`Result: ${report.status.toUpperCase()}`); + return lines.join("\n"); +} + +export async function main( + argv = process.argv.slice(2), + env = process.env, + runners: SmokeRunners = { + http: defaultHttpRunner, + command: defaultCommandRunner, + }, +): Promise { + try { + const target = parseSmokeTarget(argv, env); + const report = await runSmoke(target, runners); + const output = target.json + ? JSON.stringify(report, null, 2) + : renderHumanReport(report); + console.log(output); + return report.status === "pass" ? 0 : 1; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error( + `Azure deployment smoke configuration failed: ${sanitize(message)}`, + ); + return 2; + } +} + +async function checkAppHealth( + target: SmokeTarget, + http: HttpRunner, +): Promise> { + const url = buildHealthUrl(target.appEndpoint); + const response = await http(url, target.timeoutSeconds * 1000); + const body = response.body as { status?: unknown }; + if (response.status === 200 && body?.status === "ok") { + return { + name: "app-health", + status: "pass", + target: url, + message: "Health endpoint returned ok", + }; + } + + return { + name: "app-health", + status: "fail", + target: url, + message: `Health endpoint returned ${response.status}`, + details: { body }, + }; +} + +async function checkMigration( + target: SmokeTarget, + command: CommandRunner, +): Promise> { + const executionName = + target.migrationExecutionName ?? + (await latestMigrationExecution(target, command)); + const result = await command([ + "containerapp", + "job", + "execution", + "show", + "--resource-group", + target.resourceGroup, + "--job-name", + target.migrationJobName, + "--name", + executionName, + "--query", + "properties.status", + "--output", + "tsv", + ]); + const status = result.stdout.trim(); + if (result.status === 0 && status === "Succeeded") { + return { + name: "migration", + status: "pass", + target: `${target.migrationJobName}/${executionName}`, + message: "Migration execution succeeded", + }; + } + + return { + name: "migration", + status: "fail", + target: `${target.migrationJobName}/${executionName}`, + message: status + ? `Migration execution status is ${status}` + : "Migration execution could not be read", + details: { stdout: result.stdout, stderr: result.stderr }, + }; +} + +async function checkRevision( + target: SmokeTarget, + appName: string, + command: CommandRunner, +): Promise> { + const result = await command([ + "containerapp", + "revision", + "list", + "--resource-group", + target.resourceGroup, + "--name", + appName, + "--query", + "[?properties.active==`true`]", + "--output", + "json", + ]); + + if (result.status !== 0) { + return { + name: appName === target.appName ? "app-revision" : "worker-revision", + status: "fail", + target: `${appName}/active`, + message: "Active revisions could not be read", + details: { stderr: result.stderr }, + }; + } + + const revisions = parseJsonArray(result.stdout); + const unhealthy = revisions.filter( + (revision) => !isHealthyRevision(revision), + ); + if (revisions.length > 0 && unhealthy.length === 0) { + return { + name: appName === target.appName ? "app-revision" : "worker-revision", + status: "pass", + target: `${appName}/active`, + message: `${revisions.length} active revision(s) healthy`, + }; + } + + return { + name: appName === target.appName ? "app-revision" : "worker-revision", + status: "fail", + target: `${appName}/active`, + message: + revisions.length === 0 + ? "No active revisions found" + : "One or more active revisions are unhealthy", + details: { revisions }, + }; +} + +async function latestMigrationExecution( + target: SmokeTarget, + command: CommandRunner, +): Promise { + const result = await command([ + "containerapp", + "job", + "execution", + "list", + "--resource-group", + target.resourceGroup, + "--job-name", + target.migrationJobName, + "--query", + "sort_by(@, &properties.startTime)[-1].name", + "--output", + "tsv", + ]); + const executionName = result.stdout.trim(); + if (result.status !== 0 || !executionName) { + throw new SmokeConfigError( + "No migration execution was found for the target job", + ); + } + return executionName; +} + +async function timeCheck( + name: string, + target: string, + run: () => Promise>, +): Promise { + const started = Date.now(); + try { + return { + ...(await run()), + durationMs: Date.now() - started, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + name, + status: "fail", + target, + durationMs: Date.now() - started, + message, + }; + } +} + +async function defaultHttpRunner( + url: string, + timeoutMs: number, +): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + try { + const response = await fetch(url, { signal: controller.signal }); + const text = await response.text(); + return { + status: response.status, + body: text ? JSON.parse(text) : null, + }; + } finally { + clearTimeout(timeout); + } +} + +async function defaultCommandRunner(args: string[]): Promise { + const result = spawnSync("az", args, { + encoding: "utf8", + maxBuffer: 1024 * 1024 * 10, + }); + return { + status: result.status ?? 1, + stdout: result.stdout ?? "", + stderr: result.stderr ?? result.error?.message ?? "", + }; +} + +function parseArgs(argv: string[]): ParsedArgs { + const values: Record = {}; + let json = false; + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (arg === "--json") { + json = true; + continue; + } + + if (!arg.startsWith("--")) { + throw new SmokeConfigError(`Unexpected argument: ${arg}`); + } + + const key = arg.slice(2); + const next = argv[index + 1]; + if (!next || next.startsWith("--")) { + throw new SmokeConfigError(`Missing value for ${arg}`); + } + values[key] = next; + index += 1; + } + return { values, json }; +} + +function requireValue(value: string | undefined, name: string): string { + if (!value?.trim()) { + throw new SmokeConfigError(`Missing required smoke setting: ${name}`); + } + return value.trim(); +} + +function parseJsonArray(value: string): unknown[] { + const parsed = JSON.parse(value || "[]") as unknown; + return Array.isArray(parsed) ? parsed : []; +} + +function isHealthyRevision(revision: unknown): boolean { + if (!revision || typeof revision !== "object") { + return false; + } + const properties = "properties" in revision ? revision.properties : revision; + if (!properties || typeof properties !== "object") { + return false; + } + const values = Object.values(properties as Record).map( + (value) => String(value), + ); + return ( + values.some((value) => /running|healthy/i.test(value)) && + !values.some((value) => /failed|degraded|unhealthy/i.test(value)) + ); +} + +if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) { + void main().then((exitCode) => { + process.exit(exitCode); + }); +} diff --git a/scripts/run-azure-deploy-smoke.mjs b/scripts/run-azure-deploy-smoke.mjs new file mode 100644 index 0000000..90f5ca4 --- /dev/null +++ b/scripts/run-azure-deploy-smoke.mjs @@ -0,0 +1,23 @@ +import { spawnSync } from "node:child_process"; +import { fileURLToPath } from "node:url"; +import path from "node:path"; + +const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const tsxCli = path.join(root, "node_modules", "tsx", "dist", "cli.mjs"); +const smokeScript = path.join(root, "scripts", "azure-deploy-smoke.ts"); + +const result = spawnSync( + process.execPath, + [tsxCli, smokeScript, ...process.argv.slice(2)], + { + cwd: root, + stdio: "inherit", + env: process.env, + }, +); + +if (result.error) { + throw result.error; +} + +process.exit(result.status ?? 1); diff --git a/specs/018-opentofu-azure-infra/quickstart.md b/specs/018-opentofu-azure-infra/quickstart.md index a39e61a..903ca58 100644 --- a/specs/018-opentofu-azure-infra/quickstart.md +++ b/specs/018-opentofu-azure-infra/quickstart.md @@ -67,6 +67,22 @@ Trigger the GitHub Actions `deploy-azure.yml` workflow (or run the equivalent st From the outputs, open the Log Analytics workspace / Application Insights to see app logs, worker logs, migration Job results, and revision health (US4, SC-006). +## Step 6 — Smoke verify the deployment + +Run the deploy smoke check after promotion to confirm the app endpoint, migration execution, app revision, and worker revision are all healthy. The GitHub `Deploy Azure` workflow runs this automatically after app and worker promotion. Operators can also run it locally: + +```bash +pnpm run smoke:azure -- \ + --environment dev \ + --app-endpoint "$(tofu output -raw app_endpoint)" \ + --resource-group "$(tofu output -raw resource_group_name)" \ + --app-name "$(tofu output -raw app_container_app_name)" \ + --worker-name "$(tofu output -raw worker_container_app_name)" \ + --migration-job-name "$(tofu output -raw migration_job_name)" +``` + +See `docs/azure-deploy-smoke.md` for JSON output and failure troubleshooting. + ## Outputs you get (FR-011) `app_endpoint`, `registry_login_server`, `database_host`, `key_vault_uri`, `log_analytics_workspace_id`, `app_insights_connection_string` (sensitive), `deployment_identity_client_id`, plus the Container App / Job names used for promotion. See outputs-contract.md. diff --git a/specs/020-deploy-smoke-verification/checklists/requirements.md b/specs/020-deploy-smoke-verification/checklists/requirements.md new file mode 100644 index 0000000..7670282 --- /dev/null +++ b/specs/020-deploy-smoke-verification/checklists/requirements.md @@ -0,0 +1,34 @@ +# Specification Quality Checklist: Deploy Smoke Verification + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-06-11 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Initial validation passed. The specification intentionally avoids choosing CLI/script/workflow mechanics until planning. diff --git a/specs/020-deploy-smoke-verification/clarify.md b/specs/020-deploy-smoke-verification/clarify.md new file mode 100644 index 0000000..01334fb --- /dev/null +++ b/specs/020-deploy-smoke-verification/clarify.md @@ -0,0 +1,9 @@ +# Clarifications: Deploy Smoke Verification + +## Session 2026-06-11 + +1. Scope: Verification targets Azure deployments only. +2. Health endpoint: Use the existing deployed app health endpoint and support configured base paths. +3. Runtime checks: Treat migration, app revision, and worker revision checks as required and fail closed. +4. Worker readiness: Infer readiness from Container App revision/runtime state rather than enqueueing a real background job. +5. Evidence: Produce sanitized human output by default and JSON output on request for CI/tooling. diff --git a/specs/020-deploy-smoke-verification/contracts/smoke-cli.md b/specs/020-deploy-smoke-verification/contracts/smoke-cli.md new file mode 100644 index 0000000..cc2edb7 --- /dev/null +++ b/specs/020-deploy-smoke-verification/contracts/smoke-cli.md @@ -0,0 +1,71 @@ +# Contract: Azure Deploy Smoke CLI + +## Command + +```bash +pnpm run smoke:azure -- --environment dev --app-endpoint https://example.com/starter --resource-group rg-app-dev --app-name ca-app-dev --worker-name ca-worker-dev --migration-job-name job-migrate-dev +``` + +The command may also read values from environment variables for GitHub Actions integration. + +## Inputs + +| CLI flag | Environment variable | Required | Description | +| ----------------------- | --------------------------- | -------- | --------------------------------------------------------- | +| `--environment` | `SMOKE_ENVIRONMENT` | Yes | Target environment label. | +| `--app-endpoint` | `SMOKE_APP_ENDPOINT` | Yes | Public app endpoint, including base path when configured. | +| `--resource-group` | `SMOKE_RESOURCE_GROUP` | Yes | Azure resource group. | +| `--app-name` | `SMOKE_APP_NAME` | Yes | Web Container App name. | +| `--worker-name` | `SMOKE_WORKER_NAME` | Yes | Worker Container App name. | +| `--migration-job-name` | `SMOKE_MIGRATION_JOB_NAME` | Yes | Migration Container Apps Job name. | +| `--migration-execution` | `SMOKE_MIGRATION_EXECUTION` | No | Specific migration execution to verify. | +| `--timeout-seconds` | `SMOKE_TIMEOUT_SECONDS` | No | Overall per-check timeout; default 120. | +| `--json` | `SMOKE_JSON` | No | Print only machine-readable JSON when true. | + +## Checks + +1. Application health: `GET /api/health` must return HTTP 200 with `status: "ok"`. +2. Migration job: latest or supplied migration execution must be `Succeeded`. +3. App revision: active web Container App revision must report healthy/running state. +4. Worker revision: latest worker revision must report healthy/running state. + +## Exit Codes + +| Code | Meaning | +| ---- | ----------------------------------------- | +| `0` | All required smoke checks passed. | +| `1` | One or more required smoke checks failed. | +| `2` | Configuration or invocation was invalid. | + +## Output + +Human output: + +```text +Azure deployment smoke: dev +PASS app-health https://example.com/starter/api/health +PASS migration job-migrate-dev/execution-name +PASS app-revision ca-app-dev/latest +PASS worker-revision ca-worker-dev/latest +Result: PASS +``` + +JSON output: + +```json +{ + "environment": "dev", + "status": "pass", + "checks": [ + { + "name": "app-health", + "status": "pass", + "target": "https://example.com/starter/api/health", + "durationMs": 120, + "message": "Health endpoint returned ok" + } + ] +} +``` + +All output must be sanitized before printing. diff --git a/specs/020-deploy-smoke-verification/data-model.md b/specs/020-deploy-smoke-verification/data-model.md new file mode 100644 index 0000000..4b1096a --- /dev/null +++ b/specs/020-deploy-smoke-verification/data-model.md @@ -0,0 +1,58 @@ +# Data Model: Deploy Smoke Verification + +## SmokeTarget + +Represents the deployment environment selected by the operator. + +Fields: + +- `environment`: `dev`, `staging`, `prod`, or another explicitly supplied environment label. +- `appEndpoint`: Public application endpoint, including base path when configured. +- `resourceGroup`: Azure resource group containing runtime resources. +- `appName`: Web Container App name. +- `workerName`: Worker Container App name. +- `migrationJobName`: Migration Container Apps Job name. +- `migrationExecutionName`: Optional execution name from the deployment workflow. +- `timeoutSeconds`: Maximum time allowed for health requests and polling. + +Validation: + +- Required fields must be non-empty. +- `appEndpoint` must be an absolute HTTP or HTTPS URL. +- Timeout must be positive and bounded. + +## SmokeCheck + +Represents one required verification. + +Fields: + +- `name`: Stable check identifier. +- `status`: `pass` or `fail`. +- `target`: Sanitized target that was checked. +- `durationMs`: Runtime duration. +- `message`: Human-readable result. +- `details`: Optional sanitized diagnostic fields. + +Validation: + +- Failed checks must include a message. +- Details must be sanitized before output. + +## SmokeReport + +Represents the complete smoke verification result. + +Fields: + +- `environment`: Target environment label. +- `startedAt`: ISO timestamp. +- `finishedAt`: ISO timestamp. +- `status`: `pass` or `fail`. +- `checks`: Ordered list of `SmokeCheck`. + +Validation: + +- Overall status is `fail` if any required check failed. +- Reports must be serializable as JSON. +- Reports must not include raw secrets or tokens. diff --git a/specs/020-deploy-smoke-verification/plan.md b/specs/020-deploy-smoke-verification/plan.md new file mode 100644 index 0000000..fa21a27 --- /dev/null +++ b/specs/020-deploy-smoke-verification/plan.md @@ -0,0 +1,86 @@ +# Implementation Plan: Deploy Smoke Verification + +**Branch**: `020-deploy-smoke-verification` | **Date**: 2026-06-11 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/020-deploy-smoke-verification/spec.md` + +**Required First Step**: Read `/CONTINUE.md` before planning or implementation so the current handoff context, open risks, and recommended next actions are carried forward. + +## Summary + +Add a repeatable Azure deployment smoke verifier that checks the deployed app health endpoint, the migration job execution result, and Container App revision health for both app and worker. Implement it as a small Node/TypeScript command with injectable command/http runners for deterministic tests, then wire it into the Azure deployment workflow after revision promotion and document local usage. + +## Technical Context + +**Language/Version**: TypeScript 5.9 on Node.js via the existing `tsx` dev dependency +**Primary Dependencies**: Node built-ins, existing `tsx`, Azure CLI available in deployment runners +**Storage**: No new storage; smoke evidence remains command output and GitHub step summary +**Testing**: Vitest unit/integration tests under `tests/unit` and `tests/integration` +**Target Platform**: Local operator machines and GitHub Actions Ubuntu runners after Azure deployment +**Project Type**: Web application with deployment automation and infrastructure scripts +**Performance Goals**: Healthy smoke verification completes in under 2 minutes in normal deployment conditions +**Constraints**: No secrets in output; fail closed on missing configuration or failed checks; no new runtime dependency +**Scale/Scope**: One selected environment per invocation; validates one web app, one worker, and one migration job + +## Constitution Check + +_GATE: Must pass before Phase 0 research. Re-check after Phase 1 design._ + +- Simplicity First: PASS. A single smoke command and workflow step are enough; no service or database is added. +- Test Coverage: PASS. Tasks include tests for every user story and mock external Azure/http interactions. +- Duplication Control: PASS. Shared result and redaction helpers keep repeated smoke formatting low. +- Incremental Delivery: PASS. US1 endpoint smoke is the MVP, with runtime checks and reporting layered after. +- Spec Sequencing And Completion: PASS. `ACTIVE_SPECS.md` had no older open specs before starting 020. +- Continuity And Handoff: PASS. `CONTINUE.md` was read and will be refreshed after material changes. +- Azure OpenAI Integration: PASS. No LLM functionality is introduced. +- Web Application Standards: PASS. The smoke target supports the configured base path and existing health endpoint. +- Internationalization: PASS. No user-facing app UI text is introduced. +- Responsive Design: PASS. No UI is introduced. + +## Project Structure + +### Documentation (this feature) + +```text +specs/020-deploy-smoke-verification/ +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── contracts/ +│ └── smoke-cli.md +└── tasks.md +``` + +### Source Code (repository root) + +```text +scripts/ +├── azure-deploy-smoke.ts +└── run-azure-deploy-smoke.mjs + +tests/ +├── unit/ +│ └── azure-deploy-smoke.test.ts +└── integration/ + └── azure-deploy-smoke-cli.test.ts + +.github/workflows/ +└── deploy-azure.yml + +docs/ +└── azure-deploy-smoke.md +``` + +**Structure Decision**: Use the existing repository-level `scripts/` pattern for validation and automation commands, with tests in the existing Vitest unit/integration folders. Keep workflow integration in the existing Azure deployment workflow and add a short operator document. + +## Complexity Tracking + +No constitution violations require justification. + +## Post-Design Constitution Check + +- Simplicity First: PASS. Design remains one command plus workflow/doc wiring. +- Test Coverage: PASS. Contract includes unit and CLI tests for each user story. +- Duplication Control: PASS. The command owns check/report formatting centrally. +- Incremental Delivery: PASS. Tasks are ordered by independently testable smoke slices. +- Continuity And Handoff: PASS. Continuity updates are included in final tasks. diff --git a/specs/020-deploy-smoke-verification/quickstart.md b/specs/020-deploy-smoke-verification/quickstart.md new file mode 100644 index 0000000..8044ebe --- /dev/null +++ b/specs/020-deploy-smoke-verification/quickstart.md @@ -0,0 +1,45 @@ +# Quickstart: Azure Deployment Smoke Verification + +## Local operator run + +1. Authenticate with Azure CLI. + +```bash +az login +``` + +2. Gather values from `tofu output` or the deployment workflow summary. + +```bash +cd infra/azure +tofu output -raw app_endpoint +tofu output -raw resource_group_name +tofu output -raw app_container_app_name +tofu output -raw worker_container_app_name +tofu output -raw migration_job_name +``` + +3. Run the smoke check. + +```bash +pnpm run smoke:azure -- \ + --environment dev \ + --app-endpoint "$(tofu output -raw app_endpoint)" \ + --resource-group "$(tofu output -raw resource_group_name)" \ + --app-name "$(tofu output -raw app_container_app_name)" \ + --worker-name "$(tofu output -raw worker_container_app_name)" \ + --migration-job-name "$(tofu output -raw migration_job_name)" +``` + +The command exits `0` only when the app health endpoint, migration job, app revision, and worker revision checks all pass. + +## GitHub Actions run + +The Azure deployment workflow runs smoke verification after migration and revision promotion. The workflow summary includes the same pass/fail evidence as local output. + +## Failure handling + +- App health failure: inspect the app endpoint, database health, and recent app logs. +- Migration failure: inspect the named migration execution in Azure Container Apps Jobs. +- Revision failure: inspect active revisions for the named app or worker Container App. +- Configuration failure: confirm the workflow variables and OpenTofu outputs match the target environment. diff --git a/specs/020-deploy-smoke-verification/research.md b/specs/020-deploy-smoke-verification/research.md new file mode 100644 index 0000000..4fa87a5 --- /dev/null +++ b/specs/020-deploy-smoke-verification/research.md @@ -0,0 +1,47 @@ +# Research: Deploy Smoke Verification + +## Decision: Use a TypeScript smoke command in `scripts/` + +**Rationale**: The repository already uses Node scripts for validation and automation. TypeScript keeps parsing, redaction, and report typing testable with Vitest while avoiding a new dependency or shell-only implementation. + +**Alternatives considered**: + +- PowerShell script: familiar locally on Windows, but less natural in GitHub Ubuntu runners and harder to unit test in this repo. +- Bash-only workflow block: simplest for CI, but not reusable locally and harder to validate without Azure. +- New CLI package: unnecessary for the current scope. + +## Decision: Use Azure CLI as the Azure interaction layer + +**Rationale**: The deployment workflow already authenticates with Azure CLI and uses it for Container App and Job operations. Reusing `az` avoids adding SDK packages and keeps local operator prerequisites aligned with the existing quickstart. + +**Alternatives considered**: + +- Azure SDK packages: more structured but adds dependencies and authentication surface. +- OpenTofu outputs only: useful for names, but insufficient for live revision and job state. + +## Decision: Check the existing app health endpoint + +**Rationale**: `/api/health` already reports app and database health. The Azure output `app_endpoint` includes the configured base path, so the smoke command can append `api/health` safely. + +**Alternatives considered**: + +- Root page smoke: proves only rendering/reachability, not database health. +- New health endpoint: unnecessary unless the existing endpoint proves insufficient. + +## Decision: Treat runtime checks as fail-closed required checks + +**Rationale**: Operators need a deployment result they can trust. If migration, worker readiness, or active revision health is unknown or failed, the smoke command should fail. + +**Alternatives considered**: + +- Advisory warnings: easier rollout, but weakens the deployment gate. +- Endpoint-only MVP in CI: misses known deployment failure modes. + +## Decision: Redact output with centralized sensitive-value filtering + +**Rationale**: Smoke reports may include command output and URLs. A central sanitizer lets tests prove token, secret, password, key, and connection-string shaped data is not printed. + +**Alternatives considered**: + +- Trust Azure CLI query selection: helpful but incomplete for errors. +- No machine-readable output: less risk, but loses CI evidence. diff --git a/specs/020-deploy-smoke-verification/spec.md b/specs/020-deploy-smoke-verification/spec.md new file mode 100644 index 0000000..49e7cb8 --- /dev/null +++ b/specs/020-deploy-smoke-verification/spec.md @@ -0,0 +1,104 @@ +# Feature Specification: Deploy Smoke Verification + +**Feature Branch**: `020-deploy-smoke-verification` +**Created**: 2026-06-11 +**Status**: Draft +**Input**: User description: "Add deployment smoke verification for Azure deployments so operators can prove the deployed app, health endpoint, migrations, worker readiness, and container app revision health after deployment." + +> Before drafting or implementing this feature, review `/CONTINUE.md` for the latest handoff context and current recommended next steps. + +## User Scenarios & Testing _(mandatory)_ + +### User Story 1 - Verify The Deployed Application (Priority: P1) + +As an operator, I want one repeatable smoke check that confirms the deployed web application is reachable and healthy after deployment, so I can trust that users can access the release before I mark it successful. + +**Why this priority**: A deployment is not useful if the user-facing application cannot be reached. This is the minimum viable safety check. + +**Independent Test**: Run the smoke verification against a configured deployed environment and confirm it fails when the application endpoint is unavailable and passes when the endpoint returns a healthy result. + +**Acceptance Scenarios**: + +1. **Given** a deployed environment with a reachable application endpoint, **When** the operator runs smoke verification, **Then** the result reports the application endpoint as healthy. +2. **Given** a deployed environment with an unreachable or unhealthy application endpoint, **When** the operator runs smoke verification, **Then** the result fails with the checked endpoint and reason. + +--- + +### User Story 2 - Verify Deployment Runtime State (Priority: P2) + +As an operator, I want the same smoke process to confirm migration completion, worker readiness, and active container revision health, so deployment success reflects the complete runtime and not only the web endpoint. + +**Why this priority**: A deployment can look healthy from the web endpoint while background processing, migrations, or active revisions are broken. + +**Independent Test**: Run smoke verification with mocked or test Azure runtime responses and confirm it passes only when migration, worker, and revision states are all acceptable. + +**Acceptance Scenarios**: + +1. **Given** the latest migration completed successfully, the worker is ready, and active revisions are healthy, **When** smoke verification runs, **Then** the deployment runtime state passes. +2. **Given** any migration, worker, or active revision check fails, **When** smoke verification runs, **Then** the overall result fails and identifies the failed runtime check. + +--- + +### User Story 3 - Preserve Operator Evidence (Priority: P3) + +As an operator, I want a concise smoke report in local and CI output, so failures are easy to diagnose and successful deployments have auditable evidence. + +**Why this priority**: Verification is more useful when the result can be shared, reviewed, and traced after the deployment finishes. + +**Independent Test**: Run smoke verification in success and failure modes and confirm the generated output contains environment identity, checked targets, pass/fail statuses, and remediation-oriented failure details. + +**Acceptance Scenarios**: + +1. **Given** all checks pass, **When** smoke verification completes, **Then** the operator sees a concise success summary including environment, endpoint, and runtime checks. +2. **Given** one or more checks fail, **When** smoke verification completes, **Then** the operator sees a failure summary that names each failed check and preserves enough context for CI logs or local troubleshooting. + +### Edge Cases + +- The application is deployed under a non-root base path. +- The application endpoint returns a redirect, timeout, authentication challenge, or non-health response. +- Azure credentials are missing, expired, or lack access to the target resource group. +- A migration job has no recent execution for the deployed revision. +- Multiple active container revisions exist during rollout. +- The worker is intentionally scaled to zero but has a healthy latest revision. +- The operator runs the smoke check outside CI with manually supplied environment values. +- Secret values, connection strings, and tokens must never appear in smoke output. + +## Requirements _(mandatory)_ + +### Functional Requirements + +- **FR-001**: System MUST provide a repeatable smoke verification entry point that operators can run locally or from deployment automation. +- **FR-002**: System MUST allow operators to select the target environment without editing source files. +- **FR-003**: System MUST verify the deployed application endpoint and report whether the expected health response is reachable within a bounded time. +- **FR-004**: System MUST support applications served under a configured base path when constructing the health check target. +- **FR-005**: System MUST verify that the latest relevant migration completed successfully before the deployment is considered healthy. +- **FR-006**: System MUST verify worker readiness using deployment/runtime state that does not require invoking user workloads. +- **FR-007**: System MUST verify active container application revision health for the deployed web application and worker. +- **FR-008**: System MUST fail the smoke verification when any required check fails. +- **FR-009**: System MUST produce a concise machine-readable result suitable for CI and a human-readable summary suitable for local use. +- **FR-010**: System MUST redact secrets and sensitive runtime values from all smoke output. +- **FR-011**: System MUST document how to run smoke verification after Azure deployment. +- **FR-012**: System MUST include automated tests for success, failure, timeout, missing-configuration, and redaction behavior. + +### Key Entities + +- **Smoke Target**: The selected deployment environment and resource identifiers needed to verify it. +- **Smoke Check**: One verification unit with a name, status, checked target, duration, and failure reason when applicable. +- **Smoke Report**: The complete result containing environment identity, check outcomes, timestamps, and sanitized diagnostic details. + +## Success Criteria _(mandatory)_ + +### Measurable Outcomes + +- **SC-001**: Operators can run smoke verification for a configured environment with one command and receive a pass/fail result in under 2 minutes for normal healthy deployments. +- **SC-002**: 100% of failed required checks cause a non-success result and name the failed check. +- **SC-003**: Successful runs include at least the application endpoint, migration, worker, and revision checks in the final summary. +- **SC-004**: Smoke output contains zero raw secret values in automated redaction tests. +- **SC-005**: Deployment documentation lets a new operator run the smoke check without reading source code. + +## Assumptions + +- Azure deployment remains the target for this feature. +- The existing app health endpoint or an equivalent route can be used for the web reachability check. +- CI has access to the same Azure identity already used by deployment automation. +- Worker readiness can be inferred from Container App revision/runtime state rather than by pushing a real background job. diff --git a/specs/020-deploy-smoke-verification/tasks.md b/specs/020-deploy-smoke-verification/tasks.md new file mode 100644 index 0000000..e1f68d0 --- /dev/null +++ b/specs/020-deploy-smoke-verification/tasks.md @@ -0,0 +1,134 @@ +# Tasks: Deploy Smoke Verification + +**Input**: Design documents from `/specs/020-deploy-smoke-verification/` +**Prerequisites**: plan.md, spec.md, research.md, data-model.md, contracts/ +**Required Context**: Review `/CONTINUE.md` before task execution and update `CONTINUE.md` plus `CONTINUE_LOG.md` when project state materially changes. + +**Tests**: Required by the project constitution for every user story. + +**Organization**: Tasks are grouped by user story to enable independent implementation and testing of each story. + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Create the smoke command shell and package entry point. + +- [x] T001 Add `smoke:azure` script to `package.json` +- [x] T002 Create CLI wrapper in `scripts/run-azure-deploy-smoke.mjs` +- [x] T003 Create typed smoke command module skeleton in `scripts/azure-deploy-smoke.ts` + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Shared parsing, execution, reporting, and redaction used by all smoke stories. + +- [x] T004 Implement CLI argument and environment parsing in `scripts/azure-deploy-smoke.ts` +- [x] T005 Implement sanitized report types and redaction helpers in `scripts/azure-deploy-smoke.ts` +- [x] T006 Implement injectable HTTP and Azure command runners in `scripts/azure-deploy-smoke.ts` + +**Checkpoint**: Foundation ready; user story implementation can begin. + +--- + +## Phase 3: User Story 1 - Verify The Deployed Application (Priority: P1) MVP + +**Goal**: Operators can verify that the deployed app health endpoint is reachable and healthy. + +**Independent Test**: Mock the health endpoint and confirm the smoke command passes on `status: ok` and fails on unavailable/degraded responses. + +### Tests for User Story 1 + +- [x] T007 [P] [US1] Add app health smoke unit tests in `tests/unit/azure-deploy-smoke.test.ts` + +### Implementation for User Story 1 + +- [x] T008 [US1] Implement base-path-aware app health URL construction in `scripts/azure-deploy-smoke.ts` +- [x] T009 [US1] Implement app health smoke check in `scripts/azure-deploy-smoke.ts` +- [x] T010 [US1] Run `pnpm test -- tests/unit/azure-deploy-smoke.test.ts` + +**Checkpoint**: User Story 1 is independently functional and testable. + +--- + +## Phase 4: User Story 2 - Verify Deployment Runtime State (Priority: P2) + +**Goal**: Operators can verify migration completion, app revision health, and worker revision health. + +**Independent Test**: Mock Azure CLI responses and confirm runtime smoke passes only when migration and both revision checks are healthy. + +### Tests for User Story 2 + +- [x] T011 [P] [US2] Add migration and revision smoke unit tests in `tests/unit/azure-deploy-smoke.test.ts` + +### Implementation for User Story 2 + +- [x] T012 [US2] Implement migration job execution lookup and success validation in `scripts/azure-deploy-smoke.ts` +- [x] T013 [US2] Implement Container App revision health validation in `scripts/azure-deploy-smoke.ts` +- [x] T014 [US2] Run `pnpm test -- tests/unit/azure-deploy-smoke.test.ts` + +**Checkpoint**: User Stories 1 and 2 work independently. + +--- + +## Phase 5: User Story 3 - Preserve Operator Evidence (Priority: P3) + +**Goal**: Operators and CI receive concise, sanitized, auditable smoke output. + +**Independent Test**: Run the CLI in mocked success and failure modes and confirm exit codes plus sanitized human/JSON output. + +### Tests for User Story 3 + +- [x] T015 [P] [US3] Add CLI output and exit-code tests in `tests/integration/azure-deploy-smoke-cli.test.ts` + +### Implementation for User Story 3 + +- [x] T016 [US3] Implement human and JSON output rendering in `scripts/azure-deploy-smoke.ts` +- [x] T017 [US3] Wire smoke verification into `.github/workflows/deploy-azure.yml` +- [x] T018 [US3] Document local and CI usage in `docs/azure-deploy-smoke.md` and update `specs/018-opentofu-azure-infra/quickstart.md` +- [x] T019 [US3] Run `pnpm test -- tests/unit/azure-deploy-smoke.test.ts tests/integration/azure-deploy-smoke-cli.test.ts` + +**Checkpoint**: All user stories are independently functional. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Validation, tracking, and handoff. + +- [x] T020 Refresh spec overview in `specs/OVERVIEW.md` +- [x] T021 Update `ACTIVE_SPECS.md`, `CONTINUE.md`, and `CONTINUE_LOG.md` +- [x] T022 Run `.\validate.ps1 all` +- [x] T023 Review git diff and prepare commit + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- Setup (Phase 1): No dependencies. +- Foundational (Phase 2): Depends on setup completion and blocks all user stories. +- User Story 1 (Phase 3): Depends on foundational phase. +- User Story 2 (Phase 4): Depends on foundational phase and can be tested without US1, but will normally follow MVP validation. +- User Story 3 (Phase 5): Depends on reporting primitives from foundational phase and check results from US1/US2. +- Polish (Phase 6): Depends on completed target user stories. + +### Parallel Opportunities + +- T007, T011, and T015 can be drafted independently once the skeleton exports are known. +- Documentation T018 can proceed after the CLI contract stabilizes. + +## Implementation Strategy + +### MVP First + +1. Complete setup and foundational tasks. +2. Complete US1 app health smoke check. +3. Validate US1 with focused tests before adding Azure runtime checks. + +### Incremental Delivery + +1. Add app health smoke. +2. Add migration/revision runtime smoke. +3. Add CI wiring and operator evidence. +4. Run full validation before commit. diff --git a/specs/OVERVIEW.md b/specs/OVERVIEW.md index dd91ad3..c54e948 100644 --- a/specs/OVERVIEW.md +++ b/specs/OVERVIEW.md @@ -30,6 +30,7 @@ Purpose: Track the status of all planned features, their implementation progress | 017 | DeepSec Remediation | Fully Implemented | - | Large | Review, commit, and propagate the finished feature | | 018 | OpenTofu Azure Infrastructure | Fully Implemented | - | Large | Review, commit, and propagate the finished feature | | 019 | Logging Standardization | Fully Implemented | - | Large | Review, commit, and propagate the finished feature | +| 020 | Deploy Smoke Verification | Fully Implemented | - | Large | Review, commit, and propagate the finished feature | ## Implementation Roadmap @@ -45,6 +46,7 @@ Purpose: Track the status of all planned features, their implementation progress - 017 DeepSec Remediation: fully implemented - 018 OpenTofu Azure Infrastructure: fully implemented - 019 Logging Standardization: fully implemented +- 020 Deploy Smoke Verification: fully implemented ### Begin Immediately diff --git a/tests/integration/azure-deploy-smoke-cli.test.ts b/tests/integration/azure-deploy-smoke-cli.test.ts new file mode 100644 index 0000000..bd5a33c --- /dev/null +++ b/tests/integration/azure-deploy-smoke-cli.test.ts @@ -0,0 +1,119 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + main, + type CommandRunner, + type HttpRunner, +} from "../../scripts/azure-deploy-smoke"; + +const healthyRevision = JSON.stringify([ + { + properties: { + active: true, + healthState: "Healthy", + runningState: "Running", + }, + }, +]); + +const baseArgs = [ + "--environment", + "dev", + "--app-endpoint", + "https://example.test/starter", + "--resource-group", + "rg-dev", + "--app-name", + "app-dev", + "--worker-name", + "worker-dev", + "--migration-job-name", + "migration-dev", + "--migration-execution", + "migration-execution", +]; + +function healthyRunners(): { http: HttpRunner; command: CommandRunner } { + return { + http: vi.fn(async () => ({ status: 200, body: { status: "ok" } })), + command: vi.fn(async (args: string[]) => { + const joined = args.join(" "); + if (joined.includes("execution show")) { + return { status: 0, stdout: "Succeeded\n", stderr: "" }; + } + return { status: 0, stdout: healthyRevision, stderr: "" }; + }), + }; +} + +describe("azure deploy smoke CLI", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("prints human output and exits zero when all checks pass", async () => { + const log = vi.spyOn(console, "log").mockImplementation(() => undefined); + const error = vi + .spyOn(console, "error") + .mockImplementation(() => undefined); + + const exitCode = await main(baseArgs, process.env, healthyRunners()); + + expect(exitCode).toBe(0); + expect(log.mock.calls.join("\n")).toContain("Result: PASS"); + expect(log.mock.calls.join("\n")).toContain("PASS app-health"); + expect(error).not.toHaveBeenCalled(); + }); + + it("prints JSON output and exits one when a check fails", async () => { + const log = vi.spyOn(console, "log").mockImplementation(() => undefined); + + const exitCode = await main([...baseArgs, "--json"], process.env, { + ...healthyRunners(), + http: async () => ({ status: 503, body: { status: "degraded" } }), + }); + + expect(exitCode).toBe(1); + const parsed = JSON.parse(String(log.mock.calls[0]?.[0])) as { + status: string; + }; + expect(parsed.status).toBe("fail"); + }); + + it("exits two and reports configuration errors", async () => { + const error = vi + .spyOn(console, "error") + .mockImplementation(() => undefined); + + const exitCode = await main([], process.env, healthyRunners()); + + expect(exitCode).toBe(2); + expect(error.mock.calls.join("\n")).toContain( + "Missing required smoke setting", + ); + }); + + it("does not print raw sensitive values from failed Azure output", async () => { + const log = vi.spyOn(console, "log").mockImplementation(() => undefined); + const command: CommandRunner = vi.fn(async (args: string[]) => { + if (args.join(" ").includes("execution show")) { + return { + status: 1, + stdout: "", + stderr: "token=super-secret-token password=hunter2", + }; + } + return { status: 0, stdout: healthyRevision, stderr: "" }; + }); + + const exitCode = await main(baseArgs, process.env, { + http: async () => ({ status: 200, body: { status: "ok" } }), + command, + }); + + expect(exitCode).toBe(1); + const output = log.mock.calls.join("\n"); + expect(output).not.toContain("super-secret-token"); + expect(output).not.toContain("hunter2"); + expect(output).toContain("[REDACTED]"); + }); +}); diff --git a/tests/unit/azure-deploy-smoke.test.ts b/tests/unit/azure-deploy-smoke.test.ts new file mode 100644 index 0000000..46864de --- /dev/null +++ b/tests/unit/azure-deploy-smoke.test.ts @@ -0,0 +1,228 @@ +import { describe, expect, it, vi } from "vitest"; +import { + buildHealthUrl, + parseSmokeTarget, + renderHumanReport, + runSmoke, + sanitize, + SmokeConfigError, + type CommandRunner, + type HttpRunner, + type SmokeTarget, +} from "../../scripts/azure-deploy-smoke"; + +const healthyRevision = JSON.stringify([ + { + name: "revision-a", + properties: { + active: true, + healthState: "Healthy", + runningState: "Running", + }, + }, +]); + +function target(overrides: Partial = {}): SmokeTarget { + return { + environment: "dev", + appEndpoint: "https://example.test/starter", + resourceGroup: "rg-dev", + appName: "app-dev", + workerName: "worker-dev", + migrationJobName: "migration-dev", + migrationExecutionName: "migration-execution", + timeoutSeconds: 120, + json: false, + ...overrides, + }; +} + +function commandRunner( + overrides: Record< + string, + { status?: number; stdout?: string; stderr?: string } + > = {}, +): CommandRunner { + return vi.fn(async (args: string[]) => { + const joined = args.join(" "); + const match = Object.entries(overrides).find(([key]) => + joined.includes(key), + ); + const response = match?.[1] ?? {}; + if (joined.includes("execution show")) { + return { + status: response.status ?? 0, + stdout: response.stdout ?? "Succeeded\n", + stderr: response.stderr ?? "", + }; + } + return { + status: response.status ?? 0, + stdout: response.stdout ?? healthyRevision, + stderr: response.stderr ?? "", + }; + }); +} + +describe("azure deploy smoke", () => { + it("builds the app health URL with the configured base path", () => { + expect(buildHealthUrl("https://example.test/starter/")).toBe( + "https://example.test/starter/api/health", + ); + }); + + it("requires valid smoke configuration", () => { + expect(() => parseSmokeTarget([], process.env)).toThrow(SmokeConfigError); + expect(() => + parseSmokeTarget( + [ + "--environment", + "dev", + "--app-endpoint", + "ftp://example.test", + "--resource-group", + "rg", + "--app-name", + "app", + "--worker-name", + "worker", + "--migration-job-name", + "migration", + ], + process.env, + ), + ).toThrow("app-endpoint must be an absolute HTTP or HTTPS URL"); + }); + + it("passes when the app health endpoint and runtime checks are healthy", async () => { + const http: HttpRunner = vi.fn(async () => ({ + status: 200, + body: { status: "ok" }, + })); + + const report = await runSmoke(target(), { + http, + command: commandRunner(), + }); + + expect(report.status).toBe("pass"); + expect(report.checks.map((check) => [check.name, check.status])).toEqual([ + ["app-health", "pass"], + ["migration", "pass"], + ["app-revision", "pass"], + ["worker-revision", "pass"], + ]); + expect(http).toHaveBeenCalledWith( + "https://example.test/starter/api/health", + 120000, + ); + }); + + it("fails the app health check when the endpoint is degraded", async () => { + const http: HttpRunner = vi.fn(async () => ({ + status: 503, + body: { status: "degraded" }, + })); + + const report = await runSmoke(target(), { + http, + command: commandRunner(), + }); + + expect(report.status).toBe("fail"); + expect(report.checks[0]).toMatchObject({ + name: "app-health", + status: "fail", + message: "Health endpoint returned 503", + }); + }); + + it("looks up the latest migration execution when one is not supplied", async () => { + const command = commandRunner({ + "execution list": { stdout: "latest-execution\n" }, + }); + + const report = await runSmoke( + target({ migrationExecutionName: undefined }), + { + http: async () => ({ status: 200, body: { status: "ok" } }), + command, + }, + ); + + expect(report.status).toBe("pass"); + expect(command).toHaveBeenCalledWith( + expect.arrayContaining(["execution", "list"]), + ); + expect(command).toHaveBeenCalledWith( + expect.arrayContaining(["latest-execution"]), + ); + }); + + it("fails when the migration execution did not succeed", async () => { + const report = await runSmoke(target(), { + http: async () => ({ status: 200, body: { status: "ok" } }), + command: commandRunner({ + "execution show": { stdout: "Failed\n" }, + }), + }); + + expect(report.status).toBe("fail"); + expect( + report.checks.find((check) => check.name === "migration"), + ).toMatchObject({ + status: "fail", + message: "Migration execution status is Failed", + }); + }); + + it("fails when an active revision is unhealthy", async () => { + const report = await runSmoke(target(), { + http: async () => ({ status: 200, body: { status: "ok" } }), + command: commandRunner({ + "revision list --resource-group rg-dev --name worker-dev": { + stdout: JSON.stringify([ + { + properties: { + active: true, + healthState: "Unhealthy", + runningState: "Failed", + }, + }, + ]), + }, + }), + }); + + expect(report.status).toBe("fail"); + expect( + report.checks.find((check) => check.name === "worker-revision"), + ).toMatchObject({ + status: "fail", + message: "One or more active revisions are unhealthy", + }); + }); + + it("redacts sensitive values in reports", () => { + const sanitized = sanitize({ + token: "abc123", + url: "https://example.test?token=abc123&name=value", + message: "Bearer abc.def.ghi", + }); + + expect(JSON.stringify(sanitized)).not.toContain("abc123"); + expect(JSON.stringify(sanitized)).not.toContain("abc.def.ghi"); + expect(JSON.stringify(sanitized)).toContain("[REDACTED]"); + }); + + it("renders concise human output", async () => { + const report = await runSmoke(target(), { + http: async () => ({ status: 200, body: { status: "ok" } }), + command: commandRunner(), + }); + + expect(renderHumanReport(report)).toContain("Azure deployment smoke: dev"); + expect(renderHumanReport(report)).toContain("PASS app-health"); + expect(renderHumanReport(report)).toContain("Result: PASS"); + }); +}); diff --git a/tests/unit/security/deploy-workflow.test.ts b/tests/unit/security/deploy-workflow.test.ts index db7c64b..b7a53ef 100644 --- a/tests/unit/security/deploy-workflow.test.ts +++ b/tests/unit/security/deploy-workflow.test.ts @@ -22,6 +22,9 @@ describe("Azure deploy workflow contract", () => { expect(workflow).toContain( "actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd", ); + expect(workflow).toContain( + "actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444", + ); expect(workflow).toContain( "opentofu/setup-opentofu@847eaa4afeb791b06daa46e8eafa8b1b68d7cfb4", ); @@ -52,14 +55,18 @@ describe("Azure deploy workflow contract", () => { const provision = stepIndex(workflow, "provision"); const migrate = stepIndex(workflow, "migrate"); const promote = stepIndex(workflow, "promote-app-worker"); + const smoke = stepIndex(workflow, "smoke"); const report = stepIndex(workflow, "report"); expect(validate).toBeLessThan(provision); expect(provision).toBeLessThan(migrate); expect(migrate).toBeLessThan(promote); + expect(promote).toBeLessThan(smoke); + expect(smoke).toBeLessThan(report); expect(promote).toBeLessThan(report); expect(workflow).toContain("az acr repository show-tags"); expect(workflow).toContain("az containerapp job start"); + expect(workflow).toContain("pnpm run smoke:azure"); }); it("blocks promotion when migration fails and reports non-promotion", () => {