From f8d6810f373b2ab8afd5d75fd7b7efa3881577e0 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 14:27:18 -0800 Subject: [PATCH 01/60] feat: add phase-2 trusted release consumption platform --- .../_release-workspace-installer-core.yml | 107 ++- .github/workflows/ci.yml | 3 + .../workflows/release-with-windows-gate.yml | 6 + .../workflows/release-workspace-installer.yml | 6 + AGENTS.md | 48 +- README.md | 64 +- .../Install-WorkspaceInstallerFromRelease.ps1 | 728 ++++++++++++++++++ scripts/Test-PolicyContracts.ps1 | 26 + scripts/Test-ReleaseClientContracts.ps1 | 128 +++ scripts/Write-ReleaseManifest.ps1 | 158 ++++ tests/ReleaseClientPolicyContract.Tests.ps1 | 67 ++ tests/ReleaseClientRuntimeContract.Tests.ps1 | 47 ++ tests/ReleaseManifestContract.Tests.ps1 | 51 ++ ...eWithWindowsGateWorkflowContract.Tests.ps1 | 1 + ...orkspaceInstallerReleaseContract.Tests.ps1 | 11 + tests/WorkspaceSurfaceContract.Tests.ps1 | 42 + .../tools/cdev-cli/cli-contract.json | 4 +- .../scripts/Test-PolicyContracts.ps1 | 52 ++ .../workspace-governance.json | 43 ++ workspace-governance.json | 43 ++ 20 files changed, 1628 insertions(+), 7 deletions(-) create mode 100644 scripts/Install-WorkspaceInstallerFromRelease.ps1 create mode 100644 scripts/Test-ReleaseClientContracts.ps1 create mode 100644 scripts/Write-ReleaseManifest.ps1 create mode 100644 tests/ReleaseClientPolicyContract.Tests.ps1 create mode 100644 tests/ReleaseClientRuntimeContract.Tests.ps1 create mode 100644 tests/ReleaseManifestContract.Tests.ps1 diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 897e985..870ad74 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -17,6 +17,11 @@ on: required: false default: false type: boolean + release_channel: + description: Explicit release channel metadata (stable, prerelease, canary). + required: false + default: '' + type: string override_applied: description: Whether controlled gate override was used. required: false @@ -95,6 +100,15 @@ jobs: run: | $ErrorActionPreference = 'Stop' + $releaseChannel = [string]'${{ inputs.release_channel }}' + $isPrereleaseInput = [System.Convert]::ToBoolean([string]'${{ inputs.prerelease }}') + if ([string]::IsNullOrWhiteSpace($releaseChannel)) { + $releaseChannel = if ($isPrereleaseInput) { 'prerelease' } else { 'stable' } + } + if ($releaseChannel -notin @('stable', 'prerelease', 'canary')) { + throw "Unsupported release channel '$releaseChannel'. Expected stable, prerelease, or canary." + } + $assetName = 'lvie-cdev-workspace-installer.exe' $releaseRoot = Join-Path $env:RUNNER_TEMP 'workspace-installer-release' $payloadRoot = Join-Path $releaseRoot 'payload' @@ -169,6 +183,54 @@ jobs: $assetPath = Join-Path $releaseRoot $assetName Copy-Item -LiteralPath $deterministicInstallerPath -Destination $assetPath -Force + + $signatureStatus = 'not_signed' + $signatureSubject = '' + $signatureThumbprint = '' + $signatureTimestampUtc = '' + $codesignPfxB64 = [string]'${{ secrets.WORKSPACE_INSTALLER_CODESIGN_PFX_B64 }}' + $codesignPfxPassword = [string]'${{ secrets.WORKSPACE_INSTALLER_CODESIGN_PFX_PASSWORD }}' + $timestampServer = [string]'${{ vars.WORKSPACE_INSTALLER_TIMESTAMP_SERVER }}' + if ([string]::IsNullOrWhiteSpace($timestampServer)) { + $timestampServer = 'http://timestamp.digicert.com' + } + + if (-not [string]::IsNullOrWhiteSpace($codesignPfxB64) -or -not [string]::IsNullOrWhiteSpace($codesignPfxPassword)) { + if ([string]::IsNullOrWhiteSpace($codesignPfxB64) -or [string]::IsNullOrWhiteSpace($codesignPfxPassword)) { + throw 'Incomplete signing configuration. Configure WORKSPACE_INSTALLER_CODESIGN_PFX_B64 and WORKSPACE_INSTALLER_CODESIGN_PFX_PASSWORD together.' + } + + $pfxPath = Join-Path $releaseRoot 'workspace-installer-signing-cert.pfx' + [System.IO.File]::WriteAllBytes($pfxPath, [Convert]::FromBase64String($codesignPfxB64)) + + $securePassword = ConvertTo-SecureString -String $codesignPfxPassword -AsPlainText -Force + $certificate = Get-PfxCertificate -FilePath $pfxPath -Password $securePassword + if ($null -eq $certificate) { + throw 'Failed to load code-signing certificate from PFX.' + } + + $signResult = Set-AuthenticodeSignature -FilePath $assetPath -Certificate $certificate -HashAlgorithm SHA256 -TimestampServer $timestampServer + if ($null -eq $signResult -or [string]$signResult.Status -ne 'Valid') { + throw "Set-AuthenticodeSignature failed. status=$([string]$signResult.Status)" + } + + $signature = Get-AuthenticodeSignature -FilePath $assetPath + if ([string]$signature.Status -ne 'Valid') { + throw "Signed installer failed signature verification. status=$([string]$signature.Status)" + } + + $signatureStatus = 'signed_valid' + if ($null -ne $signature.SignerCertificate) { + $signatureSubject = [string]$signature.SignerCertificate.Subject + $signatureThumbprint = [string]$signature.SignerCertificate.Thumbprint + } + if ($null -ne $signature.TimeStamperCertificate) { + $signatureTimestampUtc = (Get-Date $signature.TimeStamperCertificate.NotBefore).ToUniversalTime().ToString('o') + } + } else { + Write-Warning 'No code-signing certificate configured. Publishing unsigned installer metadata.' + } + $assetSha = (Get-FileHash -LiteralPath $assetPath -Algorithm SHA256).Hash.ToLowerInvariant() "{0} *{1}" -f $assetSha, $assetName | Set-Content -LiteralPath (Join-Path $releaseRoot "$assetName.sha256") -Encoding ascii @@ -188,15 +250,40 @@ jobs: -OutputPath (Join-Path $provRoot 'provenance-contract-report.json') if ($LASTEXITCODE -ne 0) { throw "Provenance contract validation failed." } + $releaseManifestPath = Join-Path $releaseRoot 'release-manifest.json' + & pwsh -NoProfile -File (Join-Path $env:GITHUB_WORKSPACE 'scripts/Write-ReleaseManifest.ps1') ` + -Repository '${{ github.repository }}' ` + -ReleaseTag '${{ inputs.release_tag }}' ` + -Channel $releaseChannel ` + -InstallerPath $assetPath ` + -InstallerSha256 $assetSha ` + -InstallerShaPath (Join-Path $releaseRoot "$assetName.sha256") ` + -SpdxPath (Join-Path $provRoot 'workspace-installer.spdx.json') ` + -SlsaPath (Join-Path $provRoot 'workspace-installer.slsa.json') ` + -ReproducibilityPath (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') ` + -OutputPath $releaseManifestPath ` + -PublishedAtUtc ((Get-Date).ToUniversalTime().ToString('o')) ` + -SignatureStatus $signatureStatus ` + -SignatureSubject $signatureSubject ` + -SignatureThumbprint $signatureThumbprint ` + -SignatureTimestampUtc $signatureTimestampUtc + if ($LASTEXITCODE -ne 0) { throw "Release manifest generation failed." } + $metadataPath = Join-Path $releaseRoot 'workspace-installer-metadata.json' [ordered]@{ asset_name = $assetName asset_path = $assetPath asset_sha256 = $assetSha + release_channel = $releaseChannel install_command = 'lvie-cdev-workspace-installer.exe /S' repro_report = (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') spdx_path = (Join-Path $provRoot 'workspace-installer.spdx.json') slsa_path = (Join-Path $provRoot 'workspace-installer.slsa.json') + release_manifest_name = 'release-manifest.json' + signature_status = $signatureStatus + signature_subject = $signatureSubject + signature_thumbprint = $signatureThumbprint + signature_timestamp_utc = $signatureTimestampUtc } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $metadataPath -Encoding utf8 Copy-Item -LiteralPath (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') -Destination (Join-Path $releaseRoot 'reproducibility-report.json') -Force @@ -216,6 +303,7 @@ jobs: ${{ runner.temp }}/workspace-installer-release/reproducibility-report.json ${{ runner.temp }}/workspace-installer-release/workspace-installer.spdx.json ${{ runner.temp }}/workspace-installer-release/workspace-installer.slsa.json + ${{ runner.temp }}/workspace-installer-release/release-manifest.json ${{ runner.temp }}/workspace-installer-release/workspace-installer-metadata.json if-no-files-found: error @@ -240,6 +328,7 @@ jobs: RELEASE_TAG: ${{ inputs.release_tag }} ALLOW_EXISTING_TAG: ${{ inputs.allow_existing_tag }} PRERELEASE: ${{ inputs.prerelease }} + RELEASE_CHANNEL: ${{ inputs.release_channel }} RELEASE_TARGET_SHA: ${{ github.sha }} OVERRIDE_APPLIED: ${{ inputs.override_applied }} OVERRIDE_REASON: ${{ inputs.override_reason }} @@ -258,9 +347,10 @@ jobs: $reproPath = Join-Path $artifactRoot 'reproducibility-report.json' $spdxPath = Join-Path $artifactRoot 'workspace-installer.spdx.json' $slsaPath = Join-Path $artifactRoot 'workspace-installer.slsa.json' + $releaseManifestPath = Join-Path $artifactRoot 'release-manifest.json' $metadataPath = Join-Path $artifactRoot 'workspace-installer-metadata.json' - foreach ($path in @($assetPath, $shaPath, $reproPath, $spdxPath, $slsaPath, $metadataPath)) { + foreach ($path in @($assetPath, $shaPath, $reproPath, $spdxPath, $slsaPath, $releaseManifestPath, $metadataPath)) { if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { throw "Required release artifact missing: $path" } @@ -270,6 +360,13 @@ jobs: $assetName = [string]$metadata.asset_name $assetSha = [string]$metadata.asset_sha256 $installCommand = [string]$metadata.install_command + $releaseChannel = [string]$metadata.release_channel + if ([string]::IsNullOrWhiteSpace($releaseChannel)) { + $releaseChannel = [string]$env:RELEASE_CHANNEL + } + if ([string]::IsNullOrWhiteSpace($releaseChannel)) { + $releaseChannel = if ($prerelease) { 'prerelease' } else { 'stable' } + } if ([string]::IsNullOrWhiteSpace($assetName) -or [string]::IsNullOrWhiteSpace($assetSha)) { throw "Installer metadata is missing required fields." } @@ -304,6 +401,10 @@ jobs: "- $(Split-Path -Path $reproPath -Leaf)" "- $(Split-Path -Path $spdxPath -Leaf)" "- $(Split-Path -Path $slsaPath -Leaf)" + "- $(Split-Path -Path $releaseManifestPath -Leaf)" + "" + "Release channel:" + "- $releaseChannel" "" "SHA256:" "- $assetSha" @@ -362,9 +463,9 @@ jobs: } if ($allowExistingTag) { - & gh release upload $releaseTag $assetPath $shaPath $reproPath $spdxPath $slsaPath -R $repo --clobber + & gh release upload $releaseTag $assetPath $shaPath $reproPath $spdxPath $slsaPath $releaseManifestPath -R $repo --clobber } else { - & gh release upload $releaseTag $assetPath $shaPath $reproPath $spdxPath $slsaPath -R $repo + & gh release upload $releaseTag $assetPath $shaPath $reproPath $spdxPath $slsaPath $releaseManifestPath -R $repo } if ($LASTEXITCODE -ne 0) { throw "Failed to upload release assets for '$releaseTag'." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a42d0c8..649620c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,6 +41,9 @@ jobs: './tests/WorkspaceShaRefreshPrContract.Tests.ps1', './tests/WorkspaceManifestPinRefreshScript.Tests.ps1', './tests/WorkspaceInstallerReleaseContract.Tests.ps1', + './tests/ReleaseManifestContract.Tests.ps1', + './tests/ReleaseClientRuntimeContract.Tests.ps1', + './tests/ReleaseClientPolicyContract.Tests.ps1', './tests/WorkspaceInstallRuntimeContract.Tests.ps1', './tests/Build-WorkspaceBootstrapInstaller.Tests.ps1', './tests/Build-RunnerCliBundleFromManifest.Tests.ps1', diff --git a/.github/workflows/release-with-windows-gate.yml b/.github/workflows/release-with-windows-gate.yml index 03399fc..7d9477d 100644 --- a/.github/workflows/release-with-windows-gate.yml +++ b/.github/workflows/release-with-windows-gate.yml @@ -17,6 +17,11 @@ on: required: false default: false type: boolean + release_channel: + description: Explicit release channel metadata (stable, prerelease, canary). Leave blank to derive from prerelease. + required: false + default: '' + type: string allow_gate_override: description: Allow publish even if Windows gate fails (controlled break-glass). required: false @@ -141,6 +146,7 @@ jobs: release_tag: ${{ inputs.release_tag }} allow_existing_tag: ${{ inputs.allow_existing_tag }} prerelease: ${{ inputs.prerelease }} + release_channel: ${{ inputs.release_channel }} override_applied: ${{ fromJSON(needs.gate_policy.outputs.override_applied) }} override_reason: ${{ needs.gate_policy.outputs.override_reason }} override_incident_url: ${{ needs.gate_policy.outputs.override_incident_url }} diff --git a/.github/workflows/release-workspace-installer.yml b/.github/workflows/release-workspace-installer.yml index 65b8092..4d00d8b 100644 --- a/.github/workflows/release-workspace-installer.yml +++ b/.github/workflows/release-workspace-installer.yml @@ -17,6 +17,11 @@ on: required: false default: false type: boolean + release_channel: + description: Explicit release channel metadata (stable, prerelease, canary). Leave blank to derive from prerelease. + required: false + default: '' + type: string permissions: contents: write @@ -29,4 +34,5 @@ jobs: release_tag: ${{ inputs.release_tag }} allow_existing_tag: ${{ inputs.allow_existing_tag }} prerelease: ${{ inputs.prerelease }} + release_channel: ${{ inputs.release_channel }} secrets: inherit diff --git a/AGENTS.md b/AGENTS.md index fcccaf0..dcd0ff5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -17,6 +17,10 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Required CLI command surface (stable tokens): - `repos doctor` - `installer exercise` + - `installer install --mode release` + - `installer upgrade` + - `installer rollback` + - `installer status` - `postactions collect` - `linux deploy-ni` - Linux deploy defaults must stay documented as: @@ -74,13 +78,24 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `.github/workflows/release-workspace-installer.yml` is retained as a dispatch wrapper for diagnostics/fallback and must call `./.github/workflows/_release-workspace-installer-core.yml`. - `.github/workflows/windows-labview-image-gate.yml` is retained as a dispatch wrapper for diagnostics/fallback and must call `./.github/workflows/_windows-labview-image-gate-core.yml`. - Publishing mode is manual dispatch only with explicit semantic tag input (`v..`). +- Release channel metadata is supported via `release_channel` input (`stable`, `prerelease`, `canary`); default is derived from `prerelease`. - Release tags are immutable by default: existing tags must fail publication unless `allow_existing_tag=true` is explicitly set for break-glass recovery. - Release creation must bind tag creation to the exact workflow commit SHA (`github.sha`), not a moving branch target. - Keep fork-first mutation rules when preparing release changes: - mutate `origin` (`svelderrainruiz/labview-cdev-surface`) only - open PRs to `LabVIEW-Community-CI-CD/labview-cdev-surface:main` - Do not add push-triggered or scheduled release publishing in this repository. -- Phase-1 release policy is unsigned installer with mandatory SHA256 provenance in release notes. +- Release packaging must publish: + - `lvie-cdev-workspace-installer.exe` + - `lvie-cdev-workspace-installer.exe.sha256` + - `reproducibility-report.json` + - `workspace-installer.spdx.json` + - `workspace-installer.slsa.json` + - `release-manifest.json` +- Installer signing policy is Authenticode dual-mode transition: + - dual-mode start: `2026-03-15T00:00:00Z` + - canary enforce date: `2026-05-15T00:00:00Z` + - stable/prerelease enforce date (`grace_end_utc`): `2026-07-01T00:00:00Z` ## Installer Build Contract - `CI Pipeline` (GitHub-hosted) is the required merge check. @@ -96,6 +111,37 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `Provenance Contract` (SPDX/SLSA generation + hash-link validation). - Keep default-branch required checks unchanged until branch-protection contract is intentionally updated. +## Release Client Runtime Contract +- `scripts/Install-WorkspaceInstallerFromRelease.ps1` is the canonical release-client runtime fallback for install/upgrade/rollback operations. +- Runtime modes must remain: + - `Install` + - `Upgrade` + - `Rollback` + - `Status` + - `ValidatePolicy` +- Runtime must enforce policy allowlist on release source repositories before download. +- Runtime failure model must preserve deterministic reason codes: + - `source_blocked` + - `asset_missing` + - `hash_mismatch` + - `signature_missing` + - `signature_invalid` + - `provenance_invalid` + - `installer_exit_nonzero` + - `install_report_missing` +- Runtime must verify `release-manifest.json`, installer SHA256, Authenticode status (with channel-aware enforcement), SPDX/SLSA linkage, and installer smoke report presence. +- Release-client state/report policy files: + - `C:\dev\workspace-governance\release-policy.json` + - `C:\dev\artifacts\workspace-release-state.json` + - `C:\dev\artifacts\workspace-release-client-latest.json` +- Allowed installer release repositories default to: + - `LabVIEW-Community-CI-CD/labview-cdev-surface` + - `svelderrainruiz/labview-cdev-surface` +- cdev-cli fork/upstream full-sync alignment metadata is required in `installer_contract.release_client.cdev_cli_sync`: + - primary repo: `svelderrainruiz/labview-cdev-cli` + - mirror repo: `LabVIEW-Community-CI-CD/labview-cdev-cli` + - strategy: `fork-and-upstream-full-sync` + ## Installer Runtime Gate Contract - Installer runtime (`scripts/Install-WorkspaceFromManifest.ps1`) must fail fast if bundled `runner-cli` integrity checks fail. - Installer runtime must enforce LabVIEW 2020 capability gates in this order: diff --git a/README.md b/README.md index 41adfe7..27200c3 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,9 @@ pwsh -NoProfile -File C:\dev\tools\cdev-cli\win-x64\cdev-cli\scripts\Invoke-Cdev Core commands: - `repos doctor` - `installer exercise` +- `installer install --mode release` +- `installer upgrade` +- `installer rollback` - `postactions collect` - `linux deploy-ni --docker-context desktop-linux --image nationalinstruments/labview:latest-linux` @@ -219,11 +222,13 @@ Controlled override (exception only): Release packaging still: - Builds `lvie-cdev-workspace-installer.exe`. +- Signs installer when signing certificate secrets are configured. - Computes SHA256. - Runs determinism gates and fails on hash drift. - Generates `workspace-installer.spdx.json` and `workspace-installer.slsa.json`. +- Generates `release-manifest.json`. - Creates the GitHub release if missing and binds the tag to the exact workflow commit SHA. -- Uploads installer + SHA + provenance + reproducibility report assets to the release. +- Uploads installer + SHA + provenance + reproducibility + `release-manifest.json` assets to the release. - Writes release notes including SHA256 and the install command: ```powershell @@ -234,6 +239,63 @@ Verify downloaded asset integrity by matching the local hash against the SHA256 Tag immutability policy: existing release tags fail by default to prevent mutable release history. Fallback entrypoint: `.github/workflows/release-workspace-installer.yml` (wrapper to `_release-workspace-installer-core.yml`). +## Install from Upstream Release (Release Client) + +Use the release client runtime for one-command install/upgrade/rollback from release assets: + +```powershell +pwsh -NoProfile -File .\scripts\Install-WorkspaceInstallerFromRelease.ps1 ` + -Mode Install ` + -Channel stable +``` + +Install a specific release tag: + +```powershell +pwsh -NoProfile -File .\scripts\Install-WorkspaceInstallerFromRelease.ps1 ` + -Mode Install ` + -Tag v0.1.1 +``` + +Upgrade from the current state file to latest stable: + +```powershell +pwsh -NoProfile -File .\scripts\Install-WorkspaceInstallerFromRelease.ps1 ` + -Mode Upgrade ` + -Channel stable +``` + +Rollback to previous release state: + +```powershell +pwsh -NoProfile -File .\scripts\Install-WorkspaceInstallerFromRelease.ps1 ` + -Mode Rollback ` + -RollbackTo previous +``` + +Validate local release policy file: + +```powershell +pwsh -NoProfile -File .\scripts\Install-WorkspaceInstallerFromRelease.ps1 ` + -Mode ValidatePolicy +``` + +Release client contract paths: +- Policy: `C:\dev\workspace-governance\release-policy.json` +- State: `C:\dev\artifacts\workspace-release-state.json` +- Latest report: `C:\dev\artifacts\workspace-release-client-latest.json` + +Default allowed installer release repositories: +- `LabVIEW-Community-CI-CD/labview-cdev-surface` +- `svelderrainruiz/labview-cdev-surface` + +Fork/upstream cdev-cli synchronization policy starts with full sync metadata: +- Primary CLI repo: `svelderrainruiz/labview-cdev-cli` +- Mirror repo: `LabVIEW-Community-CI-CD/labview-cdev-cli` +- Strategy: `fork-and-upstream-full-sync` + +Release channel metadata can be set during publish with workflow input `release_channel` (`stable`, `prerelease`, `canary`). + ## Nightly canary `nightly-supplychain-canary.yml` runs on a nightly schedule and on demand. It executes: diff --git a/scripts/Install-WorkspaceInstallerFromRelease.ps1 b/scripts/Install-WorkspaceInstallerFromRelease.ps1 new file mode 100644 index 0000000..bf8e1a3 --- /dev/null +++ b/scripts/Install-WorkspaceInstallerFromRelease.ps1 @@ -0,0 +1,728 @@ +#Requires -Version 5.1 +[CmdletBinding()] +param( + [Parameter()] + [string]$WorkspaceRoot = 'C:\dev', + + [Parameter()] + [string]$ManifestPath = '', + + [Parameter()] + [ValidateSet('Install', 'Upgrade', 'Rollback', 'Status', 'ValidatePolicy')] + [string]$Mode = 'Install', + + [Parameter()] + [ValidateSet('stable', 'prerelease', 'canary')] + [string]$Channel = 'stable', + + [Parameter()] + [string]$Tag = '', + + [Parameter()] + [string]$Repository = '', + + [Parameter()] + [string]$PolicyPath = '', + + [Parameter()] + [string]$OutputPath = '', + + [Parameter()] + [switch]$AllowMajor, + + [Parameter()] + [switch]$AllowPrerelease, + + [Parameter()] + [string]$RollbackTo = 'previous' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +function Ensure-Directory { + param([Parameter(Mandatory = $true)][string]$Path) + + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Read-JsonFile { + param([Parameter(Mandatory = $true)][string]$Path) + + return (Get-Content -LiteralPath $Path -Raw | ConvertFrom-Json -Depth 100 -ErrorAction Stop) +} + +function Write-JsonFile { + param( + [Parameter(Mandatory = $true)][string]$Path, + [Parameter(Mandatory = $true)]$Object + ) + + $parent = Split-Path -Path $Path -Parent + if (-not [string]::IsNullOrWhiteSpace($parent)) { + Ensure-Directory -Path $parent + } + ($Object | ConvertTo-Json -Depth 100) + "`n" | Set-Content -LiteralPath $Path -Encoding utf8 +} + +function Throw-ReleaseClientError { + param( + [Parameter(Mandatory = $true)][string]$ReasonCode, + [Parameter(Mandatory = $true)][string]$Message + ) + + throw "[$ReasonCode] $Message" +} + +function Resolve-Sha256Hex { + param([Parameter(Mandatory = $true)][string]$Path) + + $resolved = (Resolve-Path -LiteralPath $Path).Path + return (Get-FileHash -LiteralPath $resolved -Algorithm SHA256).Hash.ToLowerInvariant() +} + +function Get-ReasonCodeFromException { + param([Parameter(Mandatory = $true)][string]$Message) + + if ($Message -match '^\[(?[a-z0-9_\-]+)\]') { + return $Matches['reason'] + } + return 'source_blocked' +} + +function Get-SemVer { + param([string]$TagName) + + if ([string]::IsNullOrWhiteSpace($TagName)) { + return $null + } + + $match = [regex]::Match($TagName, '^v?(?\d+)\.(?\d+)\.(?\d+)') + if (-not $match.Success) { + return $null + } + + return [pscustomobject]@{ + major = [int]$match.Groups['major'].Value + minor = [int]$match.Groups['minor'].Value + patch = [int]$match.Groups['patch'].Value + } +} + +function Compare-SemVer { + param( + [Parameter(Mandatory = $true)]$Left, + [Parameter(Mandatory = $true)]$Right + ) + + foreach ($name in @('major', 'minor', 'patch')) { + $l = [int]$Left.$name + $r = [int]$Right.$name + if ($l -gt $r) { return 1 } + if ($l -lt $r) { return -1 } + } + + return 0 +} + +function Test-ContainsValue { + param( + [Parameter(Mandatory = $true)]$Collection, + [Parameter(Mandatory = $true)][string]$Value + ) + + foreach ($item in @($Collection)) { + if ([string]$item -eq $Value) { + return $true + } + } + return $false +} + +function Select-LatestReleaseTag { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter(Mandatory = $true)][string]$Channel, + [Parameter(Mandatory = $true)][string]$CanaryRegex + ) + + $listJson = & gh release list -R $Repository --limit 100 --exclude-drafts --json tagName,isPrerelease,publishedAt 2>&1 + if ($LASTEXITCODE -ne 0) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Failed to list releases for '$Repository'. $([string]::Join("`n", @($listJson)))" + } + + $allReleases = $listJson | ConvertFrom-Json -ErrorAction Stop + $filtered = @() + foreach ($release in @($allReleases)) { + $tagName = [string]$release.tagName + $isPrerelease = [bool]$release.isPrerelease + + if ($Channel -eq 'stable' -and -not $isPrerelease) { + $filtered += $release + continue + } + + if ($Channel -eq 'prerelease' -and $isPrerelease -and ($tagName -notmatch $CanaryRegex)) { + $filtered += $release + continue + } + + if ($Channel -eq 'canary' -and $isPrerelease -and ($tagName -match $CanaryRegex)) { + $filtered += $release + continue + } + } + + if (@($filtered).Count -eq 0) { + Throw-ReleaseClientError -ReasonCode 'asset_missing' -Message "No '$Channel' release was found in '$Repository'." + } + + $selected = $filtered | + Sort-Object -Property @{Expression = { [DateTime]::Parse([string]$_.publishedAt).ToUniversalTime() }; Descending = $true } | + Select-Object -First 1 + + return [string]$selected.tagName +} + +function Download-ReleaseAsset { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter(Mandatory = $true)][string]$ReleaseTag, + [Parameter(Mandatory = $true)][string]$AssetName, + [Parameter(Mandatory = $true)][string]$DestinationDirectory + ) + + $downloadOutput = & gh release download $ReleaseTag -R $Repository -p $AssetName -D $DestinationDirectory --clobber 2>&1 + if ($LASTEXITCODE -ne 0) { + Throw-ReleaseClientError -ReasonCode 'asset_missing' -Message "Failed to download release asset '$AssetName' from '$Repository@$ReleaseTag'. $([string]::Join("`n", @($downloadOutput)))" + } + + $assetPath = Join-Path $DestinationDirectory $AssetName + if (-not (Test-Path -LiteralPath $assetPath -PathType Leaf)) { + Throw-ReleaseClientError -ReasonCode 'asset_missing' -Message "Release asset was not found after download: $assetPath" + } + + return $assetPath +} + +function Get-SignatureEnforcement { + param( + [Parameter(Mandatory = $true)]$SignaturePolicy, + [Parameter(Mandatory = $true)][string]$Channel + ) + + $now = (Get-Date).ToUniversalTime() + $dualStart = [DateTime]::Parse([string]$SignaturePolicy.dual_mode_start_utc).ToUniversalTime() + $canaryEnforce = [DateTime]::Parse([string]$SignaturePolicy.canary_enforce_utc).ToUniversalTime() + $graceEnd = [DateTime]::Parse([string]$SignaturePolicy.grace_end_utc).ToUniversalTime() + + $enforceAt = if ($Channel -eq 'canary') { $canaryEnforce } else { $graceEnd } + + return [pscustomobject]@{ + now_utc = $now.ToString('o') + dual_mode_start_utc = $dualStart.ToString('o') + enforce_at_utc = $enforceAt.ToString('o') + enforce_signature = ($now -ge $enforceAt) + warn_if_unsigned = ($now -ge $dualStart -and $now -lt $enforceAt) + } +} + +function Initialize-ReleaseState { + param([Parameter(Mandatory = $true)][string]$StatePath) + + if (Test-Path -LiteralPath $StatePath -PathType Leaf) { + return Read-JsonFile -Path $StatePath + } + + return [pscustomobject]@{ + current = $null + history = @() + updated_at_utc = '' + } +} + +function Save-ReleaseState { + param( + [Parameter(Mandatory = $true)][string]$StatePath, + [Parameter(Mandatory = $true)]$State + ) + + $State.updated_at_utc = (Get-Date).ToUniversalTime().ToString('o') + Write-JsonFile -Path $StatePath -Object $State +} + +function Convert-PolicyToHashtable { + param([Parameter(Mandatory = $true)]$PolicyObject) + + $json = $PolicyObject | ConvertTo-Json -Depth 100 + return ($json | ConvertFrom-Json -AsHashtable -Depth 100) +} + +function Merge-PolicyNode { + param( + [Parameter(Mandatory = $true)]$BaseNode, + [Parameter(Mandatory = $true)]$OverrideNode + ) + + if ($BaseNode -isnot [System.Collections.IDictionary] -or $OverrideNode -isnot [System.Collections.IDictionary]) { + return $OverrideNode + } + + $merged = @{} + foreach ($key in $BaseNode.Keys) { + $merged[$key] = $BaseNode[$key] + } + foreach ($key in $OverrideNode.Keys) { + if ($merged.Contains($key)) { + $merged[$key] = Merge-PolicyNode -BaseNode $merged[$key] -OverrideNode $OverrideNode[$key] + } else { + $merged[$key] = $OverrideNode[$key] + } + } + + return $merged +} + +function Load-EffectivePolicy { + param( + [Parameter(Mandatory = $true)]$ManifestReleaseClient, + [Parameter(Mandatory = $true)][string]$PolicyPath + ) + + $basePolicy = Convert-PolicyToHashtable -PolicyObject $ManifestReleaseClient + + if (-not (Test-Path -LiteralPath $PolicyPath -PathType Leaf)) { + Write-JsonFile -Path $PolicyPath -Object $ManifestReleaseClient + return (Read-JsonFile -Path $PolicyPath) + } + + $overridePolicy = Read-JsonFile -Path $PolicyPath + $overrideHash = Convert-PolicyToHashtable -PolicyObject $overridePolicy + $mergedHash = Merge-PolicyNode -BaseNode $basePolicy -OverrideNode $overrideHash + + $mergedJson = $mergedHash | ConvertTo-Json -Depth 100 + return ($mergedJson | ConvertFrom-Json -Depth 100) +} + +function Assert-ReleaseClientPolicy { + param([Parameter(Mandatory = $true)]$Policy) + + if ([string]::IsNullOrWhiteSpace([string]$Policy.schema_version)) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message 'Release policy is missing schema_version.' + } + + $allowedRepos = @($Policy.allowed_repositories) + if ($allowedRepos.Count -lt 1) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message 'Release policy must define allowed_repositories.' + } + + foreach ($requiredChannel in @('stable', 'prerelease', 'canary')) { + if (-not (Test-ContainsValue -Collection @($Policy.channel_rules.allowed_channels) -Value $requiredChannel)) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Release policy is missing allowed channel '$requiredChannel'." + } + } + + if ([string]$Policy.signature_policy.provider -ne 'authenticode') { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Unsupported signature provider '$($Policy.signature_policy.provider)'." + } + + [void][DateTime]::Parse([string]$Policy.signature_policy.dual_mode_start_utc) + [void][DateTime]::Parse([string]$Policy.signature_policy.canary_enforce_utc) + [void][DateTime]::Parse([string]$Policy.signature_policy.grace_end_utc) +} + +$report = [ordered]@{ + timestamp_utc = (Get-Date).ToUniversalTime().ToString('o') + mode = $Mode + status = 'fail' + reason_code = '' + message = '' + repository = '' + release_tag = '' + requested_channel = $Channel + selected_channel = '' + policy_path = '' + state_path = '' + install_report_path = '' + warnings = @() + details = [ordered]@{} +} + +$exitCode = 1 + +try { + foreach ($commandName in @('gh', 'git', 'pwsh')) { + if (-not (Get-Command $commandName -ErrorAction SilentlyContinue)) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Required command '$commandName' was not found on PATH." + } + } + + $resolvedManifestPath = if ([string]::IsNullOrWhiteSpace($ManifestPath)) { + Join-Path $WorkspaceRoot 'workspace-governance.json' + } else { + [System.IO.Path]::GetFullPath($ManifestPath) + } + + if (-not (Test-Path -LiteralPath $resolvedManifestPath -PathType Leaf)) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Workspace manifest was not found: $resolvedManifestPath" + } + + $manifest = Read-JsonFile -Path $resolvedManifestPath + if ($null -eq $manifest.installer_contract -or $null -eq $manifest.installer_contract.release_client) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Manifest is missing installer_contract.release_client: $resolvedManifestPath" + } + + $manifestPolicy = $manifest.installer_contract.release_client + $resolvedPolicyPath = if ([string]::IsNullOrWhiteSpace($PolicyPath)) { + [string]$manifestPolicy.policy_path + } else { + [System.IO.Path]::GetFullPath($PolicyPath) + } + if ([string]::IsNullOrWhiteSpace($resolvedPolicyPath)) { + $resolvedPolicyPath = 'C:\dev\workspace-governance\release-policy.json' + } + + $policy = Load-EffectivePolicy -ManifestReleaseClient $manifestPolicy -PolicyPath $resolvedPolicyPath + Assert-ReleaseClientPolicy -Policy $policy + + $statePath = [string]$policy.state_path + if ([string]::IsNullOrWhiteSpace($statePath)) { + $statePath = 'C:\dev\artifacts\workspace-release-state.json' + } + + $resolvedOutputPath = if ([string]::IsNullOrWhiteSpace($OutputPath)) { + [string]$policy.latest_report_path + } else { + [System.IO.Path]::GetFullPath($OutputPath) + } + if ([string]::IsNullOrWhiteSpace($resolvedOutputPath)) { + $resolvedOutputPath = 'C:\dev\artifacts\workspace-release-client-latest.json' + } + + $installReportPath = Join-Path $WorkspaceRoot 'artifacts\workspace-install-latest.json' + + $report.policy_path = $resolvedPolicyPath + $report.state_path = $statePath + $report.install_report_path = $installReportPath + + if ($Mode -eq 'ValidatePolicy') { + $report.status = 'pass' + $report.reason_code = 'ok' + $report.message = 'Release policy validation passed.' + $report.details.policy = $policy + Write-JsonFile -Path $resolvedOutputPath -Object $report + Write-Output ($report | ConvertTo-Json -Depth 30) + exit 0 + } + + $state = Initialize-ReleaseState -StatePath $statePath + + if ($Mode -eq 'Status') { + $report.status = 'pass' + $report.reason_code = 'ok' + $report.message = 'Release client status resolved from state file.' + $report.details.state = $state + Write-JsonFile -Path $resolvedOutputPath -Object $report + Write-Output ($report | ConvertTo-Json -Depth 30) + exit 0 + } + + $allowedRepositories = @($policy.allowed_repositories) + $selectedRepository = if ([string]::IsNullOrWhiteSpace($Repository)) { + [string]$allowedRepositories[0] + } else { + [string]$Repository + } + + if (-not (Test-ContainsValue -Collection $allowedRepositories -Value $selectedRepository)) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Repository '$selectedRepository' is not in release_client.allowed_repositories." + } + + $targetTag = [string]$Tag + + if ($Mode -eq 'Rollback') { + if ($RollbackTo -eq 'previous') { + $history = @($state.history) + if ($history.Count -lt 1) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message 'Rollback requested but no previous release state exists.' + } + $targetTag = [string]$history[0].release_tag + if ([string]::IsNullOrWhiteSpace($Repository) -and -not [string]::IsNullOrWhiteSpace([string]$history[0].repository)) { + $selectedRepository = [string]$history[0].repository + } + } else { + $targetTag = $RollbackTo + } + } + + if ([string]::IsNullOrWhiteSpace($targetTag)) { + $targetTag = Select-LatestReleaseTag ` + -Repository $selectedRepository ` + -Channel $Channel ` + -CanaryRegex ([string]$policy.channel_rules.canary_tag_regex) + } + + $releaseViewJson = & gh release view $targetTag -R $selectedRepository --json tagName,isPrerelease,publishedAt,url 2>&1 + if ($LASTEXITCODE -ne 0) { + Throw-ReleaseClientError -ReasonCode 'asset_missing' -Message "Failed to resolve release '$targetTag' in '$selectedRepository'. $([string]::Join("`n", @($releaseViewJson)))" + } + + $releaseInfo = $releaseViewJson | ConvertFrom-Json -ErrorAction Stop + $releaseTag = [string]$releaseInfo.tagName + $releaseIsPrerelease = [bool]$releaseInfo.isPrerelease + $releaseUrl = [string]$releaseInfo.url + + $selectedChannel = if ($releaseIsPrerelease) { + if ($releaseTag -match [string]$policy.channel_rules.canary_tag_regex) { 'canary' } else { 'prerelease' } + } else { + 'stable' + } + + if ($Mode -ne 'Rollback') { + if ($selectedChannel -eq 'prerelease' -and [string]$Channel -eq 'stable') { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Stable channel does not allow prerelease target '$releaseTag'." + } + if ($selectedChannel -eq 'canary' -and [string]$Channel -ne 'canary') { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Canary release '$releaseTag' requires channel canary." + } + } + + if ($selectedChannel -eq 'prerelease' -and -not $AllowPrerelease -and [bool]$policy.channel_rules.prerelease_requires_opt_in -and $Mode -eq 'Upgrade') { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message 'Prerelease upgrade requires explicit opt-in.' + } + + if ($selectedChannel -eq 'canary' -and -not $AllowPrerelease -and [bool]$policy.channel_rules.canary_requires_opt_in -and $Mode -eq 'Upgrade') { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message 'Canary upgrade requires explicit opt-in.' + } + + $report.repository = $selectedRepository + $report.release_tag = $releaseTag + $report.selected_channel = $selectedChannel + $report.details.release_url = $releaseUrl + + $current = $state.current + if ($Mode -eq 'Upgrade' -and $null -ne $current) { + $currentTag = [string]$current.release_tag + if ($currentTag -eq $releaseTag) { + $report.status = 'pass' + $report.reason_code = 'ok' + $report.message = "Already on release '$releaseTag'." + $report.details.state = $state + Write-JsonFile -Path $resolvedOutputPath -Object $report + Write-Output ($report | ConvertTo-Json -Depth 30) + exit 0 + } + + $currentSemVer = Get-SemVer -TagName $currentTag + $targetSemVer = Get-SemVer -TagName $releaseTag + if ($null -ne $currentSemVer -and $null -ne $targetSemVer) { + $comparison = Compare-SemVer -Left $targetSemVer -Right $currentSemVer + if ($comparison -lt 0 -and -not [bool]$policy.upgrade_policy.allow_downgrade) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Downgrade from '$currentTag' to '$releaseTag' is blocked by upgrade policy." + } + + $majorUpgradeRequested = ($targetSemVer.major -gt $currentSemVer.major) + if ($majorUpgradeRequested -and -not $AllowMajor -and -not [bool]$policy.upgrade_policy.allow_major_upgrade) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Major upgrade from '$currentTag' to '$releaseTag' requires -AllowMajor." + } + } + } + + $downloadRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("lvie-release-client-{0}" -f ([guid]::NewGuid().ToString('N'))) + Ensure-Directory -Path $downloadRoot + + $releaseManifestPath = Download-ReleaseAsset -Repository $selectedRepository -ReleaseTag $releaseTag -AssetName 'release-manifest.json' -DestinationDirectory $downloadRoot + $releaseManifest = Read-JsonFile -Path $releaseManifestPath + + if ([string]$releaseManifest.repository -ne $selectedRepository) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Release manifest repository mismatch. expected=$selectedRepository actual=$($releaseManifest.repository)" + } + if ([string]$releaseManifest.release_tag -ne $releaseTag) { + Throw-ReleaseClientError -ReasonCode 'source_blocked' -Message "Release manifest tag mismatch. expected=$releaseTag actual=$($releaseManifest.release_tag)" + } + + $installerAssetName = [string]$releaseManifest.installer.name + $shaAssetName = [string]$releaseManifest.installer.sha256_file + if ([string]::IsNullOrWhiteSpace($installerAssetName)) { + Throw-ReleaseClientError -ReasonCode 'asset_missing' -Message 'Release manifest is missing installer.name.' + } + if ([string]::IsNullOrWhiteSpace($shaAssetName)) { + $shaAssetName = "$installerAssetName.sha256" + } + + $installerPath = Download-ReleaseAsset -Repository $selectedRepository -ReleaseTag $releaseTag -AssetName $installerAssetName -DestinationDirectory $downloadRoot + $shaPath = Download-ReleaseAsset -Repository $selectedRepository -ReleaseTag $releaseTag -AssetName $shaAssetName -DestinationDirectory $downloadRoot + + $spdxAsset = @($releaseManifest.provenance.assets | Where-Object { [string]$_.name -like '*.spdx.json' } | Select-Object -First 1) + $slsaAsset = @($releaseManifest.provenance.assets | Where-Object { [string]$_.name -like '*.slsa.json' } | Select-Object -First 1) + $reproAsset = @($releaseManifest.provenance.assets | Where-Object { [string]$_.name -eq 'reproducibility-report.json' } | Select-Object -First 1) + + if ($spdxAsset.Count -ne 1 -or $slsaAsset.Count -ne 1 -or $reproAsset.Count -ne 1) { + Throw-ReleaseClientError -ReasonCode 'provenance_invalid' -Message 'Release manifest provenance assets are incomplete.' + } + + $spdxPath = Download-ReleaseAsset -Repository $selectedRepository -ReleaseTag $releaseTag -AssetName ([string]$spdxAsset[0].name) -DestinationDirectory $downloadRoot + $slsaPath = Download-ReleaseAsset -Repository $selectedRepository -ReleaseTag $releaseTag -AssetName ([string]$slsaAsset[0].name) -DestinationDirectory $downloadRoot + $reproPath = Download-ReleaseAsset -Repository $selectedRepository -ReleaseTag $releaseTag -AssetName ([string]$reproAsset[0].name) -DestinationDirectory $downloadRoot + + $expectedInstallerSha = ([string]$releaseManifest.installer.sha256).ToLowerInvariant() + if ($expectedInstallerSha -notmatch '^[0-9a-f]{64}$') { + Throw-ReleaseClientError -ReasonCode 'provenance_invalid' -Message "Release manifest installer sha256 is invalid: '$expectedInstallerSha'" + } + + $actualInstallerSha = Resolve-Sha256Hex -Path $installerPath + if ($actualInstallerSha -ne $expectedInstallerSha) { + Throw-ReleaseClientError -ReasonCode 'hash_mismatch' -Message "Installer hash mismatch. expected=$expectedInstallerSha actual=$actualInstallerSha" + } + + $shaFromFile = ((Get-Content -LiteralPath $shaPath -Raw).Split(' ')[0].Trim()).ToLowerInvariant() + if ($shaFromFile -ne $expectedInstallerSha) { + Throw-ReleaseClientError -ReasonCode 'hash_mismatch' -Message "Installer SHA file mismatch. expected=$expectedInstallerSha actual=$shaFromFile" + } + + $signaturePolicy = $policy.signature_policy + $enforcement = Get-SignatureEnforcement -SignaturePolicy $signaturePolicy -Channel $selectedChannel + + $signature = $null + if (Get-Command 'Get-AuthenticodeSignature' -ErrorAction SilentlyContinue) { + $signature = Get-AuthenticodeSignature -FilePath $installerPath + } + + $signatureStatus = if ($null -ne $signature) { [string]$signature.Status } else { 'CommandUnavailable' } + $signatureSubject = '' + $signatureThumbprint = '' + $signatureTimestampUtc = '' + if ($null -ne $signature -and $null -ne $signature.SignerCertificate) { + $signatureSubject = [string]$signature.SignerCertificate.Subject + $signatureThumbprint = [string]$signature.SignerCertificate.Thumbprint + } + if ($null -ne $signature -and $null -ne $signature.TimeStamperCertificate) { + $signatureTimestampUtc = (Get-Date $signature.TimeStamperCertificate.NotBefore).ToUniversalTime().ToString('o') + } + + $signatureIsMissing = ($signatureStatus -eq 'NotSigned') -or ($signatureStatus -eq 'CommandUnavailable') + $signatureIsInvalid = ($signatureStatus -ne 'Valid' -and -not $signatureIsMissing) + + if ([bool]$enforcement.enforce_signature) { + if ($signatureIsMissing) { + Throw-ReleaseClientError -ReasonCode 'signature_missing' -Message "Signature is required by policy but missing for '$installerAssetName'." + } + if ($signatureIsInvalid) { + Throw-ReleaseClientError -ReasonCode 'signature_invalid' -Message "Signature status '$signatureStatus' is invalid for required policy." + } + if ([bool]$signaturePolicy.require_timestamp -and [string]::IsNullOrWhiteSpace($signatureTimestampUtc)) { + Throw-ReleaseClientError -ReasonCode 'signature_invalid' -Message 'Timestamped Authenticode signature is required by policy.' + } + } else { + if ($signatureIsMissing -and [bool]$enforcement.warn_if_unsigned) { + $report.warnings += "Unsigned installer is temporarily allowed until $($enforcement.enforce_at_utc)." + } elseif ($signatureIsInvalid) { + $report.warnings += "Installer signature status '$signatureStatus' is not valid and remains in warning window." + } + } + + $spdxText = Get-Content -LiteralPath $spdxPath -Raw + $slsaText = Get-Content -LiteralPath $slsaPath -Raw + if ($spdxText -notmatch [regex]::Escape($expectedInstallerSha)) { + Throw-ReleaseClientError -ReasonCode 'provenance_invalid' -Message 'SPDX provenance does not contain installer hash.' + } + if ($slsaText -notmatch [regex]::Escape($expectedInstallerSha)) { + Throw-ReleaseClientError -ReasonCode 'provenance_invalid' -Message 'SLSA provenance does not contain installer hash.' + } + + $report.details.signature = [ordered]@{ + status = $signatureStatus + subject = $signatureSubject + thumbprint = $signatureThumbprint + timestamp_utc = $signatureTimestampUtc + enforcement = $enforcement + } + + $process = Start-Process -FilePath $installerPath -ArgumentList '/S' -Wait -PassThru + if ([int]$process.ExitCode -ne 0) { + Throw-ReleaseClientError -ReasonCode 'installer_exit_nonzero' -Message "Installer exited with code $([int]$process.ExitCode)." + } + + if (-not (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { + Throw-ReleaseClientError -ReasonCode 'install_report_missing' -Message "Installer report was not found: $installReportPath" + } + + $newEntry = [ordered]@{ + repository = $selectedRepository + release_tag = $releaseTag + channel = $selectedChannel + installed_at_utc = (Get-Date).ToUniversalTime().ToString('o') + release_url = $releaseUrl + installer_sha256 = $expectedInstallerSha + signature_status = $signatureStatus + install_report_path = $installReportPath + } + + if ($null -ne $state.current) { + $history = @($state.history) + $history = ,$state.current + $history + if ($history.Count -gt 20) { + $history = $history[0..19] + } + $state.history = $history + } + + $state.current = $newEntry + Save-ReleaseState -StatePath $statePath -State $state + + $report.status = 'pass' + $report.reason_code = 'ok' + $report.message = "Release '$releaseTag' installed successfully from '$selectedRepository'." + $report.details.installer_sha256 = $expectedInstallerSha + $report.details.release_manifest_path = $releaseManifestPath + $report.details.assets = [ordered]@{ + installer = $installerPath + sha256 = $shaPath + spdx = $spdxPath + slsa = $slsaPath + reproducibility = $reproPath + } + + Write-JsonFile -Path $resolvedOutputPath -Object $report + Write-Output ($report | ConvertTo-Json -Depth 30) + $exitCode = 0 +} catch { + $errorMessage = [string]$_.Exception.Message + $reasonCode = Get-ReasonCodeFromException -Message $errorMessage + + $report.status = 'fail' + $report.reason_code = $reasonCode + $report.message = $errorMessage + + if ([string]::IsNullOrWhiteSpace($report.repository)) { + $report.repository = if ([string]::IsNullOrWhiteSpace($Repository)) { '' } else { $Repository } + } + if ([string]::IsNullOrWhiteSpace($report.release_tag)) { + $report.release_tag = if ([string]::IsNullOrWhiteSpace($Tag)) { '' } else { $Tag } + } + + if ([string]::IsNullOrWhiteSpace($report.policy_path)) { + $report.policy_path = if ([string]::IsNullOrWhiteSpace($PolicyPath)) { '' } else { $PolicyPath } + } + if ([string]::IsNullOrWhiteSpace($report.install_report_path)) { + $report.install_report_path = Join-Path $WorkspaceRoot 'artifacts\workspace-install-latest.json' + } + + $finalOutputPath = if ([string]::IsNullOrWhiteSpace($OutputPath)) { + if ([string]::IsNullOrWhiteSpace($report.policy_path)) { + 'C:\dev\artifacts\workspace-release-client-latest.json' + } else { + Join-Path (Split-Path -Path $report.policy_path -Parent) '..\artifacts\workspace-release-client-latest.json' + } + } else { + [System.IO.Path]::GetFullPath($OutputPath) + } + + Write-JsonFile -Path $finalOutputPath -Object $report + Write-Output ($report | ConvertTo-Json -Depth 30) + $exitCode = 1 +} + +exit $exitCode diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 1d475c8..0068dab 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -98,6 +98,7 @@ Add-Check -Scope 'manifest' -Name 'has_installer_contract_provenance' -Passed ($ Add-Check -Scope 'manifest' -Name 'has_installer_contract_canary' -Passed ($installerContractMembers -contains 'canary') -Detail 'installer_contract.canary' Add-Check -Scope 'manifest' -Name 'has_installer_contract_cli_bundle' -Passed ($installerContractMembers -contains 'cli_bundle') -Detail 'installer_contract.cli_bundle' Add-Check -Scope 'manifest' -Name 'has_installer_contract_harness' -Passed ($installerContractMembers -contains 'harness') -Detail 'installer_contract.harness' +Add-Check -Scope 'manifest' -Name 'has_installer_contract_release_client' -Passed ($installerContractMembers -contains 'release_client') -Detail 'installer_contract.release_client' if ($installerContractMembers -contains 'reproducibility') { Add-Check -Scope 'manifest' -Name 'reproducibility_required_true' -Passed ([bool]$manifest.installer_contract.reproducibility.required) -Detail "required=$($manifest.installer_contract.reproducibility.required)" Add-Check -Scope 'manifest' -Name 'reproducibility_strict_hash_match_true' -Passed ([bool]$manifest.installer_contract.reproducibility.strict_hash_match) -Detail "strict_hash_match=$($manifest.installer_contract.reproducibility.strict_hash_match)" @@ -135,6 +136,31 @@ if ($installerContractMembers -contains 'harness') { Add-Check -Scope 'manifest' -Name "harness_required_postaction:$requiredPostaction" -Passed (@($harness.required_postactions) -contains $requiredPostaction) -Detail ([string]::Join(',', @($harness.required_postactions))) } } +if ($installerContractMembers -contains 'release_client') { + $releaseClient = $manifest.installer_contract.release_client + Add-Check -Scope 'manifest' -Name 'release_client_schema_version' -Passed ([string]$releaseClient.schema_version -eq '1.0') -Detail ([string]$releaseClient.schema_version) + Add-Check -Scope 'manifest' -Name 'release_client_default_install_root' -Passed ([string]$releaseClient.default_install_root -eq 'C:\dev') -Detail ([string]$releaseClient.default_install_root) + Add-Check -Scope 'manifest' -Name 'release_client_policy_path' -Passed ([string]$releaseClient.policy_path -eq 'C:\dev\workspace-governance\release-policy.json') -Detail ([string]$releaseClient.policy_path) + Add-Check -Scope 'manifest' -Name 'release_client_state_path' -Passed ([string]$releaseClient.state_path -eq 'C:\dev\artifacts\workspace-release-state.json') -Detail ([string]$releaseClient.state_path) + Add-Check -Scope 'manifest' -Name 'release_client_latest_report_path' -Passed ([string]$releaseClient.latest_report_path -eq 'C:\dev\artifacts\workspace-release-client-latest.json') -Detail ([string]$releaseClient.latest_report_path) + Add-Check -Scope 'manifest' -Name 'release_client_provenance_required' -Passed ([bool]$releaseClient.provenance_required) -Detail ([string]$releaseClient.provenance_required) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_repo_upstream' -Passed (@($releaseClient.allowed_repositories) -contains 'LabVIEW-Community-CI-CD/labview-cdev-surface') -Detail ([string]::Join(',', @($releaseClient.allowed_repositories))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_repo_fork' -Passed (@($releaseClient.allowed_repositories) -contains 'svelderrainruiz/labview-cdev-surface') -Detail ([string]::Join(',', @($releaseClient.allowed_repositories))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_channel_stable' -Passed (@($releaseClient.channel_rules.allowed_channels) -contains 'stable') -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_channel_prerelease' -Passed (@($releaseClient.channel_rules.allowed_channels) -contains 'prerelease') -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_channel_canary' -Passed (@($releaseClient.channel_rules.allowed_channels) -contains 'canary') -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + Add-Check -Scope 'manifest' -Name 'release_client_default_channel' -Passed ([string]$releaseClient.channel_rules.default_channel -eq 'stable') -Detail ([string]$releaseClient.channel_rules.default_channel) + Add-Check -Scope 'manifest' -Name 'release_client_signature_provider' -Passed ([string]$releaseClient.signature_policy.provider -eq 'authenticode') -Detail ([string]$releaseClient.signature_policy.provider) + Add-Check -Scope 'manifest' -Name 'release_client_signature_mode' -Passed ([string]$releaseClient.signature_policy.mode -eq 'dual-mode-transition') -Detail ([string]$releaseClient.signature_policy.mode) + Add-Check -Scope 'manifest' -Name 'release_client_signature_dual_mode_start' -Passed (([DateTime]$releaseClient.signature_policy.dual_mode_start_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-03-15T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.dual_mode_start_utc) + Add-Check -Scope 'manifest' -Name 'release_client_signature_canary_enforce' -Passed (([DateTime]$releaseClient.signature_policy.canary_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-05-15T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.canary_enforce_utc) + Add-Check -Scope 'manifest' -Name 'release_client_signature_grace_end' -Passed (([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.grace_end_utc) + Add-Check -Scope 'manifest' -Name 'release_client_upgrade_allow_major' -Passed (-not [bool]$releaseClient.upgrade_policy.allow_major_upgrade) -Detail ([string]$releaseClient.upgrade_policy.allow_major_upgrade) + Add-Check -Scope 'manifest' -Name 'release_client_upgrade_allow_downgrade' -Passed (-not [bool]$releaseClient.upgrade_policy.allow_downgrade) -Detail ([string]$releaseClient.upgrade_policy.allow_downgrade) + Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_primary' -Passed ([string]$releaseClient.cdev_cli_sync.primary_repo -eq 'svelderrainruiz/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.primary_repo) + Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_mirror' -Passed ([string]$releaseClient.cdev_cli_sync.mirror_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.mirror_repo) + Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_strategy' -Passed ([string]$releaseClient.cdev_cli_sync.strategy -eq 'fork-and-upstream-full-sync') -Detail ([string]$releaseClient.cdev_cli_sync.strategy) +} $requiredSchemaFields = @( 'path', diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 new file mode 100644 index 0000000..8ffd212 --- /dev/null +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -0,0 +1,128 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [string]$WorkspaceRoot = 'C:\dev', + + [Parameter()] + [switch]$FailOnWarning +) + +$ErrorActionPreference = 'Stop' + +$manifestPath = Join-Path $WorkspaceRoot 'workspace-governance.json' +$policyPath = Join-Path $WorkspaceRoot 'workspace-governance\release-policy.json' + +if (-not (Test-Path -LiteralPath $manifestPath -PathType Leaf)) { + throw "Manifest not found: $manifestPath" +} + +$manifest = Get-Content -LiteralPath $manifestPath -Raw | ConvertFrom-Json -Depth 100 +$checks = @() +$failures = @() +$warnings = @() + +function Add-Check { + param( + [string]$Name, + [bool]$Passed, + [string]$Detail, + [ValidateSet('error', 'warning')] + [string]$Severity = 'error' + ) + + $entry = [ordered]@{ + name = $Name + passed = $Passed + detail = $Detail + severity = $Severity + } + $script:checks += [pscustomobject]$entry + + if (-not $Passed) { + if ($Severity -eq 'warning') { + $script:warnings += "$Name :: $Detail" + } else { + $script:failures += "$Name :: $Detail" + } + } +} + +$releaseClient = $null +if ($null -ne $manifest.installer_contract) { + $releaseClient = $manifest.installer_contract.release_client +} + +Add-Check -Name 'release_client_exists' -Passed ($null -ne $releaseClient) -Detail 'installer_contract.release_client' + +if ($null -ne $releaseClient) { + Add-Check -Name 'schema_version' -Passed ([string]$releaseClient.schema_version -eq '1.0') -Detail ([string]$releaseClient.schema_version) + + foreach ($repo in @('LabVIEW-Community-CI-CD/labview-cdev-surface', 'svelderrainruiz/labview-cdev-surface')) { + Add-Check -Name "allowed_repository:$repo" -Passed ((@($releaseClient.allowed_repositories) -contains $repo)) -Detail ([string]::Join(',', @($releaseClient.allowed_repositories))) + } + + foreach ($channel in @('stable', 'prerelease', 'canary')) { + Add-Check -Name "allowed_channel:$channel" -Passed ((@($releaseClient.channel_rules.allowed_channels) -contains $channel)) -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + } + + Add-Check -Name 'default_channel_stable' -Passed ([string]$releaseClient.channel_rules.default_channel -eq 'stable') -Detail ([string]$releaseClient.channel_rules.default_channel) + Add-Check -Name 'signature_provider_authenticode' -Passed ([string]$releaseClient.signature_policy.provider -eq 'authenticode') -Detail ([string]$releaseClient.signature_policy.provider) + Add-Check -Name 'signature_mode_dual_mode' -Passed ([string]$releaseClient.signature_policy.mode -eq 'dual-mode-transition') -Detail ([string]$releaseClient.signature_policy.mode) + Add-Check -Name 'signature_grace_end' -Passed (([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.grace_end_utc) + Add-Check -Name 'signature_canary_enforce' -Passed (([DateTime]$releaseClient.signature_policy.canary_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-05-15T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.canary_enforce_utc) + Add-Check -Name 'signature_dual_mode_start' -Passed (([DateTime]$releaseClient.signature_policy.dual_mode_start_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-03-15T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.dual_mode_start_utc) + Add-Check -Name 'provenance_required_true' -Passed ([bool]$releaseClient.provenance_required) -Detail ([string]$releaseClient.provenance_required) + Add-Check -Name 'default_install_root' -Passed ([string]$releaseClient.default_install_root -eq 'C:\dev') -Detail ([string]$releaseClient.default_install_root) + Add-Check -Name 'upgrade_allow_major_false' -Passed (-not [bool]$releaseClient.upgrade_policy.allow_major_upgrade) -Detail ([string]$releaseClient.upgrade_policy.allow_major_upgrade) + Add-Check -Name 'upgrade_allow_downgrade_false' -Passed (-not [bool]$releaseClient.upgrade_policy.allow_downgrade) -Detail ([string]$releaseClient.upgrade_policy.allow_downgrade) + Add-Check -Name 'state_path' -Passed ([string]$releaseClient.state_path -eq 'C:\dev\artifacts\workspace-release-state.json') -Detail ([string]$releaseClient.state_path) + Add-Check -Name 'latest_report_path' -Passed ([string]$releaseClient.latest_report_path -eq 'C:\dev\artifacts\workspace-release-client-latest.json') -Detail ([string]$releaseClient.latest_report_path) + Add-Check -Name 'policy_path' -Passed ([string]$releaseClient.policy_path -eq 'C:\dev\workspace-governance\release-policy.json') -Detail ([string]$releaseClient.policy_path) + + Add-Check -Name 'cdev_cli_sync_primary_repo' -Passed ([string]$releaseClient.cdev_cli_sync.primary_repo -eq 'svelderrainruiz/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.primary_repo) + Add-Check -Name 'cdev_cli_sync_mirror_repo' -Passed ([string]$releaseClient.cdev_cli_sync.mirror_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.mirror_repo) + Add-Check -Name 'cdev_cli_sync_strategy' -Passed ([string]$releaseClient.cdev_cli_sync.strategy -eq 'fork-and-upstream-full-sync') -Detail ([string]$releaseClient.cdev_cli_sync.strategy) + + if ([DateTime]::Parse([string]$releaseClient.signature_policy.dual_mode_start_utc) -gt [DateTime]::Parse([string]$releaseClient.signature_policy.canary_enforce_utc)) { + Add-Check -Name 'signature_date_order_dual_before_canary' -Passed $false -Detail 'dual_mode_start_utc must be <= canary_enforce_utc' + } else { + Add-Check -Name 'signature_date_order_dual_before_canary' -Passed $true -Detail 'ok' + } + + if ([DateTime]::Parse([string]$releaseClient.signature_policy.canary_enforce_utc) -gt [DateTime]::Parse([string]$releaseClient.signature_policy.grace_end_utc)) { + Add-Check -Name 'signature_date_order_canary_before_grace_end' -Passed $false -Detail 'canary_enforce_utc must be <= grace_end_utc' + } else { + Add-Check -Name 'signature_date_order_canary_before_grace_end' -Passed $true -Detail 'ok' + } +} + +if (-not (Test-Path -LiteralPath $policyPath -PathType Leaf)) { + Add-Check -Name 'policy_file_exists' -Passed $false -Detail $policyPath -Severity 'warning' +} else { + Add-Check -Name 'policy_file_exists' -Passed $true -Detail $policyPath +} + +$report = [ordered]@{ + timestamp_utc = (Get-Date).ToUniversalTime().ToString('o') + workspace_root = $WorkspaceRoot + summary = [ordered]@{ + checks = $checks.Count + failures = $failures.Count + warnings = $warnings.Count + } + checks = $checks + failures = $failures + warnings = $warnings +} + +$report | ConvertTo-Json -Depth 20 | Write-Output + +if ($failures.Count -gt 0) { + exit 1 +} +if ($FailOnWarning -and $warnings.Count -gt 0) { + exit 1 +} + +exit 0 diff --git a/scripts/Write-ReleaseManifest.ps1 b/scripts/Write-ReleaseManifest.ps1 new file mode 100644 index 0000000..995f7b3 --- /dev/null +++ b/scripts/Write-ReleaseManifest.ps1 @@ -0,0 +1,158 @@ +#Requires -Version 5.1 +[CmdletBinding()] +param( + [Parameter(Mandatory = $true)] + [string]$Repository, + + [Parameter(Mandatory = $true)] + [string]$ReleaseTag, + + [Parameter()] + [ValidateSet('stable', 'prerelease', 'canary')] + [string]$Channel = 'stable', + + [Parameter(Mandatory = $true)] + [string]$InstallerPath, + + [Parameter(Mandatory = $true)] + [string]$InstallerSha256, + + [Parameter(Mandatory = $true)] + [string]$InstallerShaPath, + + [Parameter(Mandatory = $true)] + [string]$SpdxPath, + + [Parameter(Mandatory = $true)] + [string]$SlsaPath, + + [Parameter(Mandatory = $true)] + [string]$ReproducibilityPath, + + [Parameter(Mandatory = $true)] + [string]$OutputPath, + + [Parameter()] + [string]$InstallCommand = 'lvie-cdev-workspace-installer.exe /S', + + [Parameter()] + [string]$PublishedAtUtc = '', + + [Parameter()] + [string]$SignatureStatus = 'not_signed', + + [Parameter()] + [string]$SignatureSubject = '', + + [Parameter()] + [string]$SignatureThumbprint = '', + + [Parameter()] + [string]$SignatureTimestampUtc = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +function Get-Sha256Hex { + param( + [Parameter(Mandatory = $true)] + [string]$Path + ) + + $resolved = (Resolve-Path -LiteralPath $Path).Path + return (Get-FileHash -LiteralPath $resolved -Algorithm SHA256).Hash.ToLowerInvariant() +} + +function New-ProvenanceAsset { + param( + [Parameter(Mandatory = $true)] + [string]$Path, + [Parameter(Mandatory = $true)] + [string]$Repository, + [Parameter(Mandatory = $true)] + [string]$ReleaseTag + ) + + $resolved = (Resolve-Path -LiteralPath $Path).Path + $name = [System.IO.Path]::GetFileName($resolved) + [ordered]@{ + name = $name + sha256 = Get-Sha256Hex -Path $resolved + url = "https://github.com/$Repository/releases/download/$ReleaseTag/$name" + } +} + +$resolvedInstallerPath = (Resolve-Path -LiteralPath $InstallerPath).Path +$resolvedInstallerShaPath = (Resolve-Path -LiteralPath $InstallerShaPath).Path +$resolvedSpdxPath = (Resolve-Path -LiteralPath $SpdxPath).Path +$resolvedSlsaPath = (Resolve-Path -LiteralPath $SlsaPath).Path +$resolvedReproPath = (Resolve-Path -LiteralPath $ReproducibilityPath).Path + +foreach ($requiredPath in @($resolvedInstallerPath, $resolvedInstallerShaPath, $resolvedSpdxPath, $resolvedSlsaPath, $resolvedReproPath)) { + if (-not (Test-Path -LiteralPath $requiredPath -PathType Leaf)) { + throw "Required release asset was not found: $requiredPath" + } +} + +$normalizedSha = ([string]$InstallerSha256).ToLowerInvariant() +if ($normalizedSha -notmatch '^[0-9a-f]{64}$') { + throw "Installer SHA256 is invalid: '$InstallerSha256'" +} + +$installerName = [System.IO.Path]::GetFileName($resolvedInstallerPath) +$installerShaName = [System.IO.Path]::GetFileName($resolvedInstallerShaPath) +$publishedAt = if ([string]::IsNullOrWhiteSpace($PublishedAtUtc)) { + (Get-Date).ToUniversalTime().ToString('o') +} else { + [DateTime]::Parse($PublishedAtUtc).ToUniversalTime().ToString('o') +} + +$provenanceAssets = @( + (New-ProvenanceAsset -Path $resolvedSpdxPath -Repository $Repository -ReleaseTag $ReleaseTag), + (New-ProvenanceAsset -Path $resolvedSlsaPath -Repository $Repository -ReleaseTag $ReleaseTag), + (New-ProvenanceAsset -Path $resolvedReproPath -Repository $Repository -ReleaseTag $ReleaseTag) +) + +$releaseManifest = [ordered]@{ + schema_version = '1.0' + repository = $Repository + release_tag = $ReleaseTag + channel = $Channel + published_at_utc = $publishedAt + installer = [ordered]@{ + name = $installerName + url = "https://github.com/$Repository/releases/download/$ReleaseTag/$installerName" + sha256 = $normalizedSha + sha256_file = $installerShaName + signature = [ordered]@{ + provider = 'authenticode' + status = $SignatureStatus + subject = $SignatureSubject + thumbprint = $SignatureThumbprint + timestamp_utc = $SignatureTimestampUtc + } + } + provenance = [ordered]@{ + required = $true + assets = $provenanceAssets + } + install_command = $InstallCommand + compatibility = [ordered]@{ + windows_only = $true + minimum_powershell = '5.1' + release_client_mode = 'policy-driven' + } + rollback = [ordered]@{ + strategy = 'state-file-previous-or-tag' + state_path = 'C:\\dev\\artifacts\\workspace-release-state.json' + } +} + +$outputDirectory = Split-Path -Path $OutputPath -Parent +if (-not [string]::IsNullOrWhiteSpace($outputDirectory) -and -not (Test-Path -LiteralPath $outputDirectory -PathType Container)) { + New-Item -Path $outputDirectory -ItemType Directory -Force | Out-Null +} + +$releaseManifest | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $OutputPath -Encoding utf8 +Write-Host "Release manifest written: $OutputPath" diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 new file mode 100644 index 0000000..a2415a5 --- /dev/null +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -0,0 +1,67 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release client policy contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:manifestPath = Join-Path $script:repoRoot 'workspace-governance.json' + $script:payloadManifestPath = Join-Path $script:repoRoot 'workspace-governance-payload/workspace-governance/workspace-governance.json' + $script:policyScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseClientContracts.ps1' + + if (-not (Test-Path -LiteralPath $script:manifestPath -PathType Leaf)) { + throw "Manifest missing: $script:manifestPath" + } + if (-not (Test-Path -LiteralPath $script:payloadManifestPath -PathType Leaf)) { + throw "Payload manifest missing: $script:payloadManifestPath" + } + if (-not (Test-Path -LiteralPath $script:policyScriptPath -PathType Leaf)) { + throw "Release client policy script missing: $script:policyScriptPath" + } + + $script:manifest = Get-Content -LiteralPath $script:manifestPath -Raw | ConvertFrom-Json -Depth 100 + $script:payloadManifest = Get-Content -LiteralPath $script:payloadManifestPath -Raw | ConvertFrom-Json -Depth 100 + $script:policyScriptContent = Get-Content -LiteralPath $script:policyScriptPath -Raw + } + + It 'defines release_client policy defaults in manifest and payload manifest' { + $releaseClient = $script:manifest.installer_contract.release_client + $releaseClient | Should -Not -BeNullOrEmpty + $releaseClient.schema_version | Should -Be '1.0' + @($releaseClient.allowed_repositories) | Should -Contain 'LabVIEW-Community-CI-CD/labview-cdev-surface' + @($releaseClient.allowed_repositories) | Should -Contain 'svelderrainruiz/labview-cdev-surface' + @($releaseClient.channel_rules.allowed_channels) | Should -Contain 'stable' + @($releaseClient.channel_rules.allowed_channels) | Should -Contain 'prerelease' + @($releaseClient.channel_rules.allowed_channels) | Should -Contain 'canary' + $releaseClient.signature_policy.provider | Should -Be 'authenticode' + $releaseClient.signature_policy.mode | Should -Be 'dual-mode-transition' + ([DateTime]$releaseClient.signature_policy.dual_mode_start_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-03-15T00:00:00Z' + ([DateTime]$releaseClient.signature_policy.canary_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-05-15T00:00:00Z' + ([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-07-01T00:00:00Z' + $releaseClient.policy_path | Should -Be 'C:\dev\workspace-governance\release-policy.json' + $releaseClient.state_path | Should -Be 'C:\dev\artifacts\workspace-release-state.json' + $releaseClient.latest_report_path | Should -Be 'C:\dev\artifacts\workspace-release-client-latest.json' + $releaseClient.cdev_cli_sync.primary_repo | Should -Be 'svelderrainruiz/labview-cdev-cli' + $releaseClient.cdev_cli_sync.mirror_repo | Should -Be 'LabVIEW-Community-CI-CD/labview-cdev-cli' + $releaseClient.cdev_cli_sync.strategy | Should -Be 'fork-and-upstream-full-sync' + + ($script:payloadManifest | ConvertTo-Json -Depth 100) | Should -Be ($script:manifest | ConvertTo-Json -Depth 100) + } + + It 'includes release-client policy validation script content' { + $script:policyScriptContent | Should -Match 'release_client_exists' + $script:policyScriptContent | Should -Match 'allowed_repository:' + $script:policyScriptContent | Should -Match 'LabVIEW-Community-CI-CD/labview-cdev-surface' + $script:policyScriptContent | Should -Match 'svelderrainruiz/labview-cdev-surface' + $script:policyScriptContent | Should -Match 'cdev_cli_sync_primary_repo' + $script:policyScriptContent | Should -Match 'cdev_cli_sync_mirror_repo' + } + + It 'has parse-safe PowerShell syntax' { + $tokens = $null + $errors = $null + [void][System.Management.Automation.Language.Parser]::ParseInput($script:policyScriptContent, [ref]$tokens, [ref]$errors) + @($errors).Count | Should -Be 0 + } +} diff --git a/tests/ReleaseClientRuntimeContract.Tests.ps1 b/tests/ReleaseClientRuntimeContract.Tests.ps1 new file mode 100644 index 0000000..92b7be7 --- /dev/null +++ b/tests/ReleaseClientRuntimeContract.Tests.ps1 @@ -0,0 +1,47 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release client runtime contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:scriptPath = Join-Path $script:repoRoot 'scripts/Install-WorkspaceInstallerFromRelease.ps1' + if (-not (Test-Path -LiteralPath $script:scriptPath -PathType Leaf)) { + throw "Release client runtime script missing: $script:scriptPath" + } + $script:scriptContent = Get-Content -LiteralPath $script:scriptPath -Raw + } + + It 'defines additive install/upgrade/rollback/status/policy modes' { + $script:scriptContent | Should -Match "ValidateSet\('Install', 'Upgrade', 'Rollback', 'Status', 'ValidatePolicy'\)" + $script:scriptContent | Should -Match "ValidateSet\('stable', 'prerelease', 'canary'\)" + $script:scriptContent | Should -Match '\$AllowMajor' + $script:scriptContent | Should -Match '\$RollbackTo' + $script:scriptContent | Should -Match '\$PolicyPath' + } + + It 'enforces release source allowlist, signatures, provenance, and installer report checks' { + $script:scriptContent | Should -Match 'allowed_repositories' + $script:scriptContent | Should -Match 'release-manifest\.json' + $script:scriptContent | Should -Match 'Get-AuthenticodeSignature' + $script:scriptContent | Should -Match '\.spdx\.json' + $script:scriptContent | Should -Match '\.slsa\.json' + $script:scriptContent | Should -Match 'workspace-install-latest\.json' + $script:scriptContent | Should -Match 'workspace-release-state\.json' + $script:scriptContent | Should -Match 'workspace-release-client-latest\.json' + } + + It 'defines deterministic failure reason codes' { + foreach ($reason in @('source_blocked', 'asset_missing', 'hash_mismatch', 'signature_missing', 'signature_invalid', 'provenance_invalid', 'installer_exit_nonzero', 'install_report_missing')) { + $script:scriptContent | Should -Match $reason + } + } + + It 'has parse-safe PowerShell syntax' { + $tokens = $null + $errors = $null + [void][System.Management.Automation.Language.Parser]::ParseInput($script:scriptContent, [ref]$tokens, [ref]$errors) + @($errors).Count | Should -Be 0 + } +} diff --git a/tests/ReleaseManifestContract.Tests.ps1 b/tests/ReleaseManifestContract.Tests.ps1 new file mode 100644 index 0000000..215cb09 --- /dev/null +++ b/tests/ReleaseManifestContract.Tests.ps1 @@ -0,0 +1,51 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release manifest contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:scriptPath = Join-Path $script:repoRoot 'scripts/Write-ReleaseManifest.ps1' + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/_release-workspace-installer-core.yml' + + if (-not (Test-Path -LiteralPath $script:scriptPath -PathType Leaf)) { + throw "Release manifest script missing: $script:scriptPath" + } + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Release core workflow missing: $script:workflowPath" + } + + $script:scriptContent = Get-Content -LiteralPath $script:scriptPath -Raw + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + } + + It 'defines required release-manifest fields and signature metadata' { + $script:scriptContent | Should -Match 'schema_version' + $script:scriptContent | Should -Match 'repository' + $script:scriptContent | Should -Match 'release_tag' + $script:scriptContent | Should -Match 'channel' + $script:scriptContent | Should -Match 'published_at_utc' + $script:scriptContent | Should -Match 'installer' + $script:scriptContent | Should -Match 'sha256' + $script:scriptContent | Should -Match 'signature' + $script:scriptContent | Should -Match 'provenance' + $script:scriptContent | Should -Match 'install_command' + $script:scriptContent | Should -Match 'compatibility' + $script:scriptContent | Should -Match 'rollback' + $script:scriptContent | Should -Match 'authenticode' + } + + It 'is generated and published by release workflow' { + $script:workflowContent | Should -Match 'Write-ReleaseManifest\.ps1' + $script:workflowContent | Should -Match 'release-manifest\.json' + $script:workflowContent | Should -Match 'gh release upload' + } + + It 'has parse-safe PowerShell syntax' { + $tokens = $null + $errors = $null + [void][System.Management.Automation.Language.Parser]::ParseInput($script:scriptContent, [ref]$tokens, [ref]$errors) + @($errors).Count | Should -Be 0 + } +} diff --git a/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 b/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 index 2261ef8..e783d7e 100644 --- a/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 +++ b/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 @@ -21,6 +21,7 @@ Describe 'Release with Windows gate workflow contract' { $script:workflowContent | Should -Match 'release_tag:' $script:workflowContent | Should -Match 'allow_existing_tag:' $script:workflowContent | Should -Match 'prerelease:' + $script:workflowContent | Should -Match 'release_channel:' $script:workflowContent | Should -Match 'allow_gate_override:' $script:workflowContent | Should -Match 'override_reason:' $script:workflowContent | Should -Match 'override_incident_url:' diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 912abde..755e391 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -27,6 +27,7 @@ Describe 'Workspace installer release workflow contract' { $script:wrapperWorkflowContent | Should -Match 'required:\s*true' $script:wrapperWorkflowContent | Should -Match 'type:\s*string' $script:wrapperWorkflowContent | Should -Match 'prerelease:' + $script:wrapperWorkflowContent | Should -Match 'release_channel:' $script:wrapperWorkflowContent | Should -Match 'type:\s*boolean' $script:wrapperWorkflowContent | Should -Match 'allow_existing_tag:' $script:wrapperWorkflowContent | Should -Match 'Allow updating an existing release tag' @@ -38,6 +39,7 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'release_tag:' $script:coreWorkflowContent | Should -Match 'allow_existing_tag:' $script:coreWorkflowContent | Should -Match 'prerelease:' + $script:coreWorkflowContent | Should -Match 'release_channel:' $script:coreWorkflowContent | Should -Match 'override_applied:' $script:coreWorkflowContent | Should -Match 'override_reason:' $script:coreWorkflowContent | Should -Match 'override_incident_url:' @@ -53,7 +55,13 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'Test-RunnerCliBundleDeterminism\.ps1' $script:coreWorkflowContent | Should -Match 'Test-WorkspaceInstallerDeterminism\.ps1' $script:coreWorkflowContent | Should -Match 'Write-ReleaseProvenance\.ps1' + $script:coreWorkflowContent | Should -Match 'Write-ReleaseManifest\.ps1' $script:coreWorkflowContent | Should -Match 'Test-ProvenanceContracts\.ps1' + $script:coreWorkflowContent | Should -Match 'Set-AuthenticodeSignature' + $script:coreWorkflowContent | Should -Match 'Get-AuthenticodeSignature' + $script:coreWorkflowContent | Should -Match 'WORKSPACE_INSTALLER_CODESIGN_PFX_B64' + $script:coreWorkflowContent | Should -Match 'WORKSPACE_INSTALLER_CODESIGN_PFX_PASSWORD' + $script:coreWorkflowContent | Should -Match 'release-manifest\.json' $script:coreWorkflowContent | Should -Match 'release and parity artifact roots are identical' $script:coreWorkflowContent | Should -Match 'must not point to parity path' $script:coreWorkflowContent | Should -Match 'Parity artifact path was selected for release publish input' @@ -75,6 +83,9 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'workspace-installer\.spdx\.json' $script:coreWorkflowContent | Should -Match 'workspace-installer\.slsa\.json' $script:coreWorkflowContent | Should -Match 'reproducibility-report\.json' + $script:coreWorkflowContent | Should -Match 'Release channel' + $script:coreWorkflowContent | Should -Match 'release_channel' + $script:coreWorkflowContent | Should -Match 'release-manifest\.json' $script:coreWorkflowContent | Should -Match 'Override Disclosure' $script:coreWorkflowContent | Should -Match 'OVERRIDE_APPLIED' } diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 44ec2d7..4a4cf9b 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -26,7 +26,10 @@ Describe 'Workspace surface contract' { $script:runnerCliDeterminismScriptPath = Join-Path $script:repoRoot 'scripts/Test-RunnerCliBundleDeterminism.ps1' $script:installerDeterminismScriptPath = Join-Path $script:repoRoot 'scripts/Test-WorkspaceInstallerDeterminism.ps1' $script:writeProvenanceScriptPath = Join-Path $script:repoRoot 'scripts/Write-ReleaseProvenance.ps1' + $script:writeReleaseManifestScriptPath = Join-Path $script:repoRoot 'scripts/Write-ReleaseManifest.ps1' $script:testProvenanceScriptPath = Join-Path $script:repoRoot 'scripts/Test-ProvenanceContracts.ps1' + $script:installFromReleaseScriptPath = Join-Path $script:repoRoot 'scripts/Install-WorkspaceInstallerFromRelease.ps1' + $script:testReleaseClientContractsScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseClientContracts.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' $script:nsisInstallerPath = Join-Path $script:repoRoot 'nsis/workspace-bootstrap-installer.nsi' $script:ciWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ci.yml' @@ -75,7 +78,10 @@ Describe 'Workspace surface contract' { $script:runnerCliDeterminismScriptPath, $script:installerDeterminismScriptPath, $script:writeProvenanceScriptPath, + $script:writeReleaseManifestScriptPath, $script:testProvenanceScriptPath, + $script:installFromReleaseScriptPath, + $script:testReleaseClientContractsScriptPath, $script:dockerLinuxIterationScriptPath, $script:nsisInstallerPath, $script:ciWorkflowPath, @@ -206,6 +212,28 @@ Describe 'Workspace surface contract' { (@($script:manifest.installer_contract.harness.required_postactions) -contains 'ppl_capability_checks.32') | Should -BeTrue (@($script:manifest.installer_contract.harness.required_postactions) -contains 'ppl_capability_checks.64') | Should -BeTrue (@($script:manifest.installer_contract.harness.required_postactions) -contains 'vip_package_build_check') | Should -BeTrue + $script:manifest.installer_contract.release_client.schema_version | Should -Be '1.0' + (@($script:manifest.installer_contract.release_client.allowed_repositories) -contains 'LabVIEW-Community-CI-CD/labview-cdev-surface') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.allowed_repositories) -contains 'svelderrainruiz/labview-cdev-surface') | Should -BeTrue + $script:manifest.installer_contract.release_client.channel_rules.default_channel | Should -Be 'stable' + (@($script:manifest.installer_contract.release_client.channel_rules.allowed_channels) -contains 'stable') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.channel_rules.allowed_channels) -contains 'prerelease') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.channel_rules.allowed_channels) -contains 'canary') | Should -BeTrue + $script:manifest.installer_contract.release_client.signature_policy.provider | Should -Be 'authenticode' + $script:manifest.installer_contract.release_client.signature_policy.mode | Should -Be 'dual-mode-transition' + ([DateTime]$script:manifest.installer_contract.release_client.signature_policy.dual_mode_start_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-03-15T00:00:00Z' + ([DateTime]$script:manifest.installer_contract.release_client.signature_policy.canary_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-05-15T00:00:00Z' + ([DateTime]$script:manifest.installer_contract.release_client.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-07-01T00:00:00Z' + $script:manifest.installer_contract.release_client.provenance_required | Should -BeTrue + $script:manifest.installer_contract.release_client.default_install_root | Should -Be 'C:\dev' + $script:manifest.installer_contract.release_client.upgrade_policy.allow_downgrade | Should -BeFalse + $script:manifest.installer_contract.release_client.upgrade_policy.allow_major_upgrade | Should -BeFalse + $script:manifest.installer_contract.release_client.policy_path | Should -Be 'C:\dev\workspace-governance\release-policy.json' + $script:manifest.installer_contract.release_client.state_path | Should -Be 'C:\dev\artifacts\workspace-release-state.json' + $script:manifest.installer_contract.release_client.latest_report_path | Should -Be 'C:\dev\artifacts\workspace-release-client-latest.json' + $script:manifest.installer_contract.release_client.cdev_cli_sync.primary_repo | Should -Be 'svelderrainruiz/labview-cdev-cli' + $script:manifest.installer_contract.release_client.cdev_cli_sync.mirror_repo | Should -Be 'LabVIEW-Community-CI-CD/labview-cdev-cli' + $script:manifest.installer_contract.release_client.cdev_cli_sync.strategy | Should -Be 'fork-and-upstream-full-sync' foreach ($repo in @($script:manifest.managed_repos)) { $repo.PSObject.Properties.Name | Should -Contain 'required_gh_repo' $repo.PSObject.Properties.Name | Should -Contain 'default_branch' @@ -280,6 +308,12 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'iteration-summary\.json' $script:agentsContent | Should -Match 'exercise-report\.json' $script:agentsContent | Should -Match 'workspace-install-latest\.json' + $script:agentsContent | Should -Match 'release-manifest\.json' + $script:agentsContent | Should -Match 'Install-WorkspaceInstallerFromRelease\.ps1' + $script:agentsContent | Should -Match 'workspace-release-state\.json' + $script:agentsContent | Should -Match 'workspace-release-client-latest\.json' + $script:agentsContent | Should -Match 'svelderrainruiz/labview-cdev-cli' + $script:agentsContent | Should -Match 'LabVIEW-Community-CI-CD/labview-cdev-cli' $script:readmeContent | Should -Match 'Workspace SHA Refresh PR' $script:readmeContent | Should -Match 'automation/sha-refresh' $script:readmeContent | Should -Match 'Invoke-CdevCli\.ps1' @@ -296,6 +330,10 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'integration/' $script:readmeContent | Should -Match 'self-hosted-windows-lv' $script:readmeContent | Should -Match 'installer-harness' + $script:readmeContent | Should -Match 'release-manifest\.json' + $script:readmeContent | Should -Match 'Install-WorkspaceInstallerFromRelease\.ps1' + $script:readmeContent | Should -Match 'workspace-release-state\.json' + $script:readmeContent | Should -Match 'workspace-release-client-latest\.json' } It 'documents Windows feature troubleshooting reporting contract for Docker gating' { @@ -342,9 +380,13 @@ Describe 'Workspace surface contract' { $script:releaseCoreWorkflowContent | Should -Match 'lvie-cdev-workspace-installer\.exe' $script:releaseCoreWorkflowContent | Should -Match 'Build-RunnerCliBundleFromManifest\.ps1' $script:releaseCoreWorkflowContent | Should -Match 'gh release upload' + $script:releaseCoreWorkflowContent | Should -Match 'Write-ReleaseManifest\.ps1' + $script:releaseCoreWorkflowContent | Should -Match 'Set-AuthenticodeSignature' + $script:releaseCoreWorkflowContent | Should -Match 'release-manifest\.json' $script:releaseCoreWorkflowContent | Should -Match 'workspace-installer\.spdx\.json' $script:releaseCoreWorkflowContent | Should -Match 'workspace-installer\.slsa\.json' $script:releaseWithGateWorkflowContent | Should -Match 'allow_gate_override:' + $script:releaseWithGateWorkflowContent | Should -Match 'release_channel:' $script:releaseWithGateWorkflowContent | Should -Match 'uses:\s*\./\.github/workflows/_windows-labview-image-gate-core\.yml' $script:releaseWithGateWorkflowContent | Should -Match 'uses:\s*\./\.github/workflows/_linux-labview-image-gate-core\.yml' } diff --git a/workspace-governance-payload/tools/cdev-cli/cli-contract.json b/workspace-governance-payload/tools/cdev-cli/cli-contract.json index e3c8c6c..4b3e531 100644 --- a/workspace-governance-payload/tools/cdev-cli/cli-contract.json +++ b/workspace-governance-payload/tools/cdev-cli/cli-contract.json @@ -5,10 +5,10 @@ "help": ["", ""], "repos": ["list", "doctor"], "surface": ["sync"], - "installer": ["build", "exercise", "install"], + "installer": ["build", "exercise", "install", "upgrade", "rollback", "status", "policy-validate"], "postactions": ["collect"], "linux": ["install", "deploy-ni"], "ci": ["integration-gate"], - "release": ["package"] + "release": ["package", "install", "upgrade", "rollback", "status"] } } diff --git a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 index eef4f4a..0068dab 100644 --- a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 +++ b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 @@ -96,6 +96,9 @@ $installerContractMembers = if ($null -ne $installerContract) { @($installerCont Add-Check -Scope 'manifest' -Name 'has_installer_contract_reproducibility' -Passed ($installerContractMembers -contains 'reproducibility') -Detail 'installer_contract.reproducibility' Add-Check -Scope 'manifest' -Name 'has_installer_contract_provenance' -Passed ($installerContractMembers -contains 'provenance') -Detail 'installer_contract.provenance' Add-Check -Scope 'manifest' -Name 'has_installer_contract_canary' -Passed ($installerContractMembers -contains 'canary') -Detail 'installer_contract.canary' +Add-Check -Scope 'manifest' -Name 'has_installer_contract_cli_bundle' -Passed ($installerContractMembers -contains 'cli_bundle') -Detail 'installer_contract.cli_bundle' +Add-Check -Scope 'manifest' -Name 'has_installer_contract_harness' -Passed ($installerContractMembers -contains 'harness') -Detail 'installer_contract.harness' +Add-Check -Scope 'manifest' -Name 'has_installer_contract_release_client' -Passed ($installerContractMembers -contains 'release_client') -Detail 'installer_contract.release_client' if ($installerContractMembers -contains 'reproducibility') { Add-Check -Scope 'manifest' -Name 'reproducibility_required_true' -Passed ([bool]$manifest.installer_contract.reproducibility.required) -Detail "required=$($manifest.installer_contract.reproducibility.required)" Add-Check -Scope 'manifest' -Name 'reproducibility_strict_hash_match_true' -Passed ([bool]$manifest.installer_contract.reproducibility.strict_hash_match) -Detail "strict_hash_match=$($manifest.installer_contract.reproducibility.strict_hash_match)" @@ -109,6 +112,55 @@ if ($installerContractMembers -contains 'canary') { Add-Check -Scope 'manifest' -Name 'canary_has_schedule' -Passed (-not [string]::IsNullOrWhiteSpace([string]$manifest.installer_contract.canary.schedule_cron_utc)) -Detail ([string]$manifest.installer_contract.canary.schedule_cron_utc) Add-Check -Scope 'manifest' -Name 'canary_linux_context' -Passed ([string]$manifest.installer_contract.canary.docker_context -eq 'desktop-linux') -Detail ([string]$manifest.installer_contract.canary.docker_context) } +if ($installerContractMembers -contains 'cli_bundle') { + $cliBundle = $manifest.installer_contract.cli_bundle + Add-Check -Scope 'manifest' -Name 'cli_bundle_repo' -Passed ([string]$cliBundle.repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$cliBundle.repo) + Add-Check -Scope 'manifest' -Name 'cli_bundle_asset_win' -Passed ([string]$cliBundle.asset_win -eq 'cdev-cli-win-x64.zip') -Detail ([string]$cliBundle.asset_win) + Add-Check -Scope 'manifest' -Name 'cli_bundle_asset_linux' -Passed ([string]$cliBundle.asset_linux -eq 'cdev-cli-linux-x64.tar.gz') -Detail ([string]$cliBundle.asset_linux) + Add-Check -Scope 'manifest' -Name 'cli_bundle_asset_win_sha256' -Passed ([regex]::IsMatch(([string]$cliBundle.asset_win_sha256).ToLowerInvariant(), '^[0-9a-f]{64}$')) -Detail ([string]$cliBundle.asset_win_sha256) + Add-Check -Scope 'manifest' -Name 'cli_bundle_asset_linux_sha256' -Passed ([regex]::IsMatch(([string]$cliBundle.asset_linux_sha256).ToLowerInvariant(), '^[0-9a-f]{64}$')) -Detail ([string]$cliBundle.asset_linux_sha256) + Add-Check -Scope 'manifest' -Name 'cli_bundle_entrypoint_win' -Passed ([string]$cliBundle.entrypoint_win -eq 'tools\cdev-cli\win-x64\cdev-cli\scripts\Invoke-CdevCli.ps1') -Detail ([string]$cliBundle.entrypoint_win) + Add-Check -Scope 'manifest' -Name 'cli_bundle_entrypoint_linux' -Passed ([string]$cliBundle.entrypoint_linux -eq 'tools/cdev-cli/linux-x64/cdev-cli/scripts/Invoke-CdevCli.ps1') -Detail ([string]$cliBundle.entrypoint_linux) +} +if ($installerContractMembers -contains 'harness') { + $harness = $manifest.installer_contract.harness + Add-Check -Scope 'manifest' -Name 'harness_workflow_name' -Passed ([string]$harness.workflow_name -eq 'installer-harness-self-hosted.yml') -Detail ([string]$harness.workflow_name) + Add-Check -Scope 'manifest' -Name 'harness_trigger_mode' -Passed ([string]$harness.trigger_mode -eq 'integration_branch_push_and_dispatch') -Detail ([string]$harness.trigger_mode) + foreach ($label in @('self-hosted', 'windows', 'self-hosted-windows-lv')) { + Add-Check -Scope 'manifest' -Name "harness_runner_label:$label" -Passed (@($harness.runner_labels) -contains $label) -Detail ([string]::Join(',', @($harness.runner_labels))) + } + foreach ($requiredReport in @('iteration-summary.json', 'exercise-report.json', 'C:\dev-smoke-lvie\artifacts\workspace-install-latest.json', 'lvie-cdev-workspace-installer-bundle.zip', 'harness-validation-report.json')) { + Add-Check -Scope 'manifest' -Name "harness_required_report:$requiredReport" -Passed (@($harness.required_reports) -contains $requiredReport) -Detail ([string]::Join(',', @($harness.required_reports))) + } + foreach ($requiredPostaction in @('ppl_capability_checks.32', 'ppl_capability_checks.64', 'vip_package_build_check')) { + Add-Check -Scope 'manifest' -Name "harness_required_postaction:$requiredPostaction" -Passed (@($harness.required_postactions) -contains $requiredPostaction) -Detail ([string]::Join(',', @($harness.required_postactions))) + } +} +if ($installerContractMembers -contains 'release_client') { + $releaseClient = $manifest.installer_contract.release_client + Add-Check -Scope 'manifest' -Name 'release_client_schema_version' -Passed ([string]$releaseClient.schema_version -eq '1.0') -Detail ([string]$releaseClient.schema_version) + Add-Check -Scope 'manifest' -Name 'release_client_default_install_root' -Passed ([string]$releaseClient.default_install_root -eq 'C:\dev') -Detail ([string]$releaseClient.default_install_root) + Add-Check -Scope 'manifest' -Name 'release_client_policy_path' -Passed ([string]$releaseClient.policy_path -eq 'C:\dev\workspace-governance\release-policy.json') -Detail ([string]$releaseClient.policy_path) + Add-Check -Scope 'manifest' -Name 'release_client_state_path' -Passed ([string]$releaseClient.state_path -eq 'C:\dev\artifacts\workspace-release-state.json') -Detail ([string]$releaseClient.state_path) + Add-Check -Scope 'manifest' -Name 'release_client_latest_report_path' -Passed ([string]$releaseClient.latest_report_path -eq 'C:\dev\artifacts\workspace-release-client-latest.json') -Detail ([string]$releaseClient.latest_report_path) + Add-Check -Scope 'manifest' -Name 'release_client_provenance_required' -Passed ([bool]$releaseClient.provenance_required) -Detail ([string]$releaseClient.provenance_required) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_repo_upstream' -Passed (@($releaseClient.allowed_repositories) -contains 'LabVIEW-Community-CI-CD/labview-cdev-surface') -Detail ([string]::Join(',', @($releaseClient.allowed_repositories))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_repo_fork' -Passed (@($releaseClient.allowed_repositories) -contains 'svelderrainruiz/labview-cdev-surface') -Detail ([string]::Join(',', @($releaseClient.allowed_repositories))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_channel_stable' -Passed (@($releaseClient.channel_rules.allowed_channels) -contains 'stable') -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_channel_prerelease' -Passed (@($releaseClient.channel_rules.allowed_channels) -contains 'prerelease') -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + Add-Check -Scope 'manifest' -Name 'release_client_allowed_channel_canary' -Passed (@($releaseClient.channel_rules.allowed_channels) -contains 'canary') -Detail ([string]::Join(',', @($releaseClient.channel_rules.allowed_channels))) + Add-Check -Scope 'manifest' -Name 'release_client_default_channel' -Passed ([string]$releaseClient.channel_rules.default_channel -eq 'stable') -Detail ([string]$releaseClient.channel_rules.default_channel) + Add-Check -Scope 'manifest' -Name 'release_client_signature_provider' -Passed ([string]$releaseClient.signature_policy.provider -eq 'authenticode') -Detail ([string]$releaseClient.signature_policy.provider) + Add-Check -Scope 'manifest' -Name 'release_client_signature_mode' -Passed ([string]$releaseClient.signature_policy.mode -eq 'dual-mode-transition') -Detail ([string]$releaseClient.signature_policy.mode) + Add-Check -Scope 'manifest' -Name 'release_client_signature_dual_mode_start' -Passed (([DateTime]$releaseClient.signature_policy.dual_mode_start_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-03-15T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.dual_mode_start_utc) + Add-Check -Scope 'manifest' -Name 'release_client_signature_canary_enforce' -Passed (([DateTime]$releaseClient.signature_policy.canary_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-05-15T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.canary_enforce_utc) + Add-Check -Scope 'manifest' -Name 'release_client_signature_grace_end' -Passed (([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.signature_policy.grace_end_utc) + Add-Check -Scope 'manifest' -Name 'release_client_upgrade_allow_major' -Passed (-not [bool]$releaseClient.upgrade_policy.allow_major_upgrade) -Detail ([string]$releaseClient.upgrade_policy.allow_major_upgrade) + Add-Check -Scope 'manifest' -Name 'release_client_upgrade_allow_downgrade' -Passed (-not [bool]$releaseClient.upgrade_policy.allow_downgrade) -Detail ([string]$releaseClient.upgrade_policy.allow_downgrade) + Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_primary' -Passed ([string]$releaseClient.cdev_cli_sync.primary_repo -eq 'svelderrainruiz/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.primary_repo) + Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_mirror' -Passed ([string]$releaseClient.cdev_cli_sync.mirror_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.mirror_repo) + Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_strategy' -Passed ([string]$releaseClient.cdev_cli_sync.strategy -eq 'fork-and-upstream-full-sync') -Detail ([string]$releaseClient.cdev_cli_sync.strategy) +} $requiredSchemaFields = @( 'path', diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 0b09cc5..f122468 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -203,6 +203,48 @@ ], "linux_parity_override_image": "nationalinstruments/labview:2025q3-linux@sha256:9938561c6460841674f9b1871d8562242f51fe9fb72a2c39c66608491edf429c" } + }, + "release_client": { + "schema_version": "1.0", + "allowed_repositories": [ + "LabVIEW-Community-CI-CD/labview-cdev-surface", + "svelderrainruiz/labview-cdev-surface" + ], + "channel_rules": { + "default_channel": "stable", + "allowed_channels": [ + "stable", + "prerelease", + "canary" + ], + "prerelease_requires_opt_in": true, + "canary_requires_opt_in": true, + "canary_tag_regex": "(?i)canary" + }, + "signature_policy": { + "provider": "authenticode", + "mode": "dual-mode-transition", + "dual_mode_start_utc": "2026-03-15T00:00:00Z", + "canary_enforce_utc": "2026-05-15T00:00:00Z", + "grace_end_utc": "2026-07-01T00:00:00Z", + "require_timestamp": true, + "allow_unsigned_before_dual_mode_start": true + }, + "provenance_required": true, + "default_install_root": "C:\\dev", + "upgrade_policy": { + "allow_downgrade": false, + "allow_major_upgrade": false, + "allow_prerelease_upgrade_without_opt_in": false + }, + "state_path": "C:\\dev\\artifacts\\workspace-release-state.json", + "latest_report_path": "C:\\dev\\artifacts\\workspace-release-client-latest.json", + "policy_path": "C:\\dev\\workspace-governance\\release-policy.json", + "cdev_cli_sync": { + "primary_repo": "svelderrainruiz/labview-cdev-cli", + "mirror_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", + "strategy": "fork-and-upstream-full-sync" + } } }, "managed_repos": [ @@ -503,3 +545,4 @@ } ] } + diff --git a/workspace-governance.json b/workspace-governance.json index 0b09cc5..f122468 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -203,6 +203,48 @@ ], "linux_parity_override_image": "nationalinstruments/labview:2025q3-linux@sha256:9938561c6460841674f9b1871d8562242f51fe9fb72a2c39c66608491edf429c" } + }, + "release_client": { + "schema_version": "1.0", + "allowed_repositories": [ + "LabVIEW-Community-CI-CD/labview-cdev-surface", + "svelderrainruiz/labview-cdev-surface" + ], + "channel_rules": { + "default_channel": "stable", + "allowed_channels": [ + "stable", + "prerelease", + "canary" + ], + "prerelease_requires_opt_in": true, + "canary_requires_opt_in": true, + "canary_tag_regex": "(?i)canary" + }, + "signature_policy": { + "provider": "authenticode", + "mode": "dual-mode-transition", + "dual_mode_start_utc": "2026-03-15T00:00:00Z", + "canary_enforce_utc": "2026-05-15T00:00:00Z", + "grace_end_utc": "2026-07-01T00:00:00Z", + "require_timestamp": true, + "allow_unsigned_before_dual_mode_start": true + }, + "provenance_required": true, + "default_install_root": "C:\\dev", + "upgrade_policy": { + "allow_downgrade": false, + "allow_major_upgrade": false, + "allow_prerelease_upgrade_without_opt_in": false + }, + "state_path": "C:\\dev\\artifacts\\workspace-release-state.json", + "latest_report_path": "C:\\dev\\artifacts\\workspace-release-client-latest.json", + "policy_path": "C:\\dev\\workspace-governance\\release-policy.json", + "cdev_cli_sync": { + "primary_repo": "svelderrainruiz/labview-cdev-cli", + "mirror_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", + "strategy": "fork-and-upstream-full-sync" + } } }, "managed_repos": [ @@ -503,3 +545,4 @@ } ] } + From 589b7f8851640313cfef1ea0a1209ae6d91fc71a Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 14:34:55 -0800 Subject: [PATCH 02/60] fix(release): align release manifest repro asset with published name --- .../workflows/_release-workspace-installer-core.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 870ad74..a9562d8 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -250,6 +250,11 @@ jobs: -OutputPath (Join-Path $provRoot 'provenance-contract-report.json') if ($LASTEXITCODE -ne 0) { throw "Provenance contract validation failed." } + $reproReleasePath = Join-Path $releaseRoot 'reproducibility-report.json' + Copy-Item -LiteralPath (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') -Destination $reproReleasePath -Force + Copy-Item -LiteralPath (Join-Path $provRoot 'workspace-installer.spdx.json') -Destination (Join-Path $releaseRoot 'workspace-installer.spdx.json') -Force + Copy-Item -LiteralPath (Join-Path $provRoot 'workspace-installer.slsa.json') -Destination (Join-Path $releaseRoot 'workspace-installer.slsa.json') -Force + $releaseManifestPath = Join-Path $releaseRoot 'release-manifest.json' & pwsh -NoProfile -File (Join-Path $env:GITHUB_WORKSPACE 'scripts/Write-ReleaseManifest.ps1') ` -Repository '${{ github.repository }}' ` @@ -260,7 +265,7 @@ jobs: -InstallerShaPath (Join-Path $releaseRoot "$assetName.sha256") ` -SpdxPath (Join-Path $provRoot 'workspace-installer.spdx.json') ` -SlsaPath (Join-Path $provRoot 'workspace-installer.slsa.json') ` - -ReproducibilityPath (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') ` + -ReproducibilityPath $reproReleasePath ` -OutputPath $releaseManifestPath ` -PublishedAtUtc ((Get-Date).ToUniversalTime().ToString('o')) ` -SignatureStatus $signatureStatus ` @@ -286,10 +291,6 @@ jobs: signature_timestamp_utc = $signatureTimestampUtc } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $metadataPath -Encoding utf8 - Copy-Item -LiteralPath (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') -Destination (Join-Path $releaseRoot 'reproducibility-report.json') -Force - Copy-Item -LiteralPath (Join-Path $provRoot 'workspace-installer.spdx.json') -Destination (Join-Path $releaseRoot 'workspace-installer.spdx.json') -Force - Copy-Item -LiteralPath (Join-Path $provRoot 'workspace-installer.slsa.json') -Destination (Join-Path $releaseRoot 'workspace-installer.slsa.json') -Force - "asset_name=$assetName" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 "asset_sha256=$assetSha" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 From 1890b543081d15d245d90bf92ec98ae9d648f78a Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 14:57:36 -0800 Subject: [PATCH 03/60] ci(release): fail fast when no eligible self-hosted runner is online --- .../_release-workspace-installer-core.yml | 78 +++++++++++++++++++ ...orkspaceInstallerReleaseContract.Tests.ps1 | 7 ++ 2 files changed, 85 insertions(+) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index a9562d8..c02625b 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -42,8 +42,86 @@ permissions: contents: write jobs: + runner_preflight: + name: Release Runner Availability Preflight + runs-on: ubuntu-latest + outputs: + reason_code: ${{ steps.check.outputs.reason_code }} + steps: + - id: check + name: Validate eligible self-hosted release runner availability + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + + $repo = [string]'${{ github.repository }}' + $requiredLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') + $reportPath = Join-Path $env:RUNNER_TEMP 'release-runner-availability-preflight.json' + + $runnersJson = & gh api "repos/$repo/actions/runners?per_page=100" 2>&1 + if ($LASTEXITCODE -ne 0) { + throw "Failed to list runners for '$repo'. $([string]::Join("`n", @($runnersJson)))" + } + + $runnerPayload = $runnersJson | ConvertFrom-Json -ErrorAction Stop + $onlineRunners = @() + $eligibleRunners = @() + foreach ($runner in @($runnerPayload.runners)) { + if ([string]$runner.status -ne 'online') { + continue + } + + $onlineRunners += [string]$runner.name + $runnerLabels = @{} + foreach ($label in @($runner.labels)) { + $runnerLabels[[string]$label.name.ToLowerInvariant()] = $true + } + + $missingLabels = @($requiredLabels | Where-Object { -not $runnerLabels.ContainsKey($_) }) + if ($missingLabels.Count -eq 0) { + $eligibleRunners += [ordered]@{ + name = [string]$runner.name + labels = @($runner.labels | ForEach-Object { [string]$_.name }) + } + } + } + + $report = [ordered]@{ + schema_version = '1.0' + repository = $repo + generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') + required_labels = $requiredLabels + online_runners = $onlineRunners + eligible_runners = $eligibleRunners + status = if ($eligibleRunners.Count -gt 0) { 'pass' } else { 'fail' } + reason_code = if ($eligibleRunners.Count -gt 0) { 'ok' } else { 'runner_unavailable' } + remediation = 'Register at least one online self-hosted runner with labels self-hosted, windows, self-hosted-windows-lv.' + } + + $report | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $reportPath -Encoding utf8 + + if ($eligibleRunners.Count -gt 0) { + "reason_code=ok" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + Write-Host "Runner preflight passed. Eligible runners: $($eligibleRunners.Count)." + exit 0 + } + + "reason_code=runner_unavailable" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + throw "[runner_unavailable] No online runner matched required labels ($($requiredLabels -join ', ')). Remediation: $($report.remediation)" + + - name: Upload runner availability preflight report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-runner-availability-preflight-${{ github.run_id }} + path: ${{ runner.temp }}/release-runner-availability-preflight.json + if-no-files-found: error + package: name: Package Workspace Installer + needs: [runner_preflight] runs-on: [self-hosted, windows, self-hosted-windows-lv] outputs: asset_name: ${{ steps.package.outputs.asset_name }} diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 755e391..7dac9ac 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -46,7 +46,14 @@ Describe 'Workspace installer release workflow contract' { } It 'defines package and publish jobs with release asset upload' { + $script:coreWorkflowContent | Should -Match 'name:\s*Release Runner Availability Preflight' + $script:coreWorkflowContent | Should -Match 'Validate eligible self-hosted release runner availability' + $script:coreWorkflowContent | Should -Match 'repos/\$repo/actions/runners\?per_page=100' + $script:coreWorkflowContent | Should -Match 'reason_code=runner_unavailable' + $script:coreWorkflowContent | Should -Match '\[runner_unavailable\]' + $script:coreWorkflowContent | Should -Match 'release-runner-availability-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Package Workspace Installer' + $script:coreWorkflowContent | Should -Match 'needs:\s*\[runner_preflight\]' $script:coreWorkflowContent | Should -Match 'name:\s*Publish GitHub Release Asset' $script:coreWorkflowContent | Should -Match 'Release preflight - verify icon-editor upstream pin freshness' $script:coreWorkflowContent | Should -Match 'repos/LabVIEW-Community-CI-CD/labview-icon-editor/branches/develop' From 0039a5880729c874ca30dee6cb92980f25896ca7 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 15:42:10 -0800 Subject: [PATCH 04/60] feat(ops): add autonomous release control plane and ops hardening (#3) Co-authored-by: svelderrainruiz --- .../_release-workspace-installer-core.yml | 74 ++- .../workflows/canary-smoke-tag-hygiene.yml | 77 +++ .github/workflows/ci.yml | 8 +- .github/workflows/ops-autoremediate.yml | 99 ++++ .github/workflows/ops-monitoring.yml | 105 ++++ .github/workflows/release-control-plane.yml | 159 ++++++ .../workflows/release-with-windows-gate.yml | 45 +- .github/workflows/weekly-ops-slo-report.yml | 54 +++ AGENTS.md | 34 ++ README.md | 33 ++ .../runbooks/release-ops-incident-response.md | 126 +++++ scripts/Invoke-CanarySmokeTagHygiene.ps1 | 177 +++++++ scripts/Invoke-OpsAutoRemediation.ps1 | 200 ++++++++ scripts/Invoke-OpsMonitoringSnapshot.ps1 | 244 ++++++++++ scripts/Invoke-ReleaseControlPlane.ps1 | 452 ++++++++++++++++++ scripts/Write-OpsSloReport.ps1 | 192 ++++++++ ...ySmokeTagHygieneWorkflowContract.Tests.ps1 | 47 ++ ...sAutoRemediationWorkflowContract.Tests.ps1 | 46 ++ tests/OpsMonitoringWorkflowContract.Tests.ps1 | 45 ++ ...easeControlPlaneWorkflowContract.Tests.ps1 | 55 +++ ...eWithWindowsGateWorkflowContract.Tests.ps1 | 10 +- tests/ScopeAOpsRunbookContract.Tests.ps1 | 42 ++ ...eklyOpsSloReportWorkflowContract.Tests.ps1 | 44 ++ ...orkspaceInstallerReleaseContract.Tests.ps1 | 13 +- 24 files changed, 2373 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/canary-smoke-tag-hygiene.yml create mode 100644 .github/workflows/ops-autoremediate.yml create mode 100644 .github/workflows/ops-monitoring.yml create mode 100644 .github/workflows/release-control-plane.yml create mode 100644 .github/workflows/weekly-ops-slo-report.yml create mode 100644 docs/runbooks/release-ops-incident-response.md create mode 100644 scripts/Invoke-CanarySmokeTagHygiene.ps1 create mode 100644 scripts/Invoke-OpsAutoRemediation.ps1 create mode 100644 scripts/Invoke-OpsMonitoringSnapshot.ps1 create mode 100644 scripts/Invoke-ReleaseControlPlane.ps1 create mode 100644 scripts/Write-OpsSloReport.ps1 create mode 100644 tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 create mode 100644 tests/OpsAutoRemediationWorkflowContract.Tests.ps1 create mode 100644 tests/OpsMonitoringWorkflowContract.Tests.ps1 create mode 100644 tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 create mode 100644 tests/ScopeAOpsRunbookContract.Tests.ps1 create mode 100644 tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1 diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index c02625b..f734148 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -42,9 +42,49 @@ permissions: contents: write jobs: + ops_health_preflight: + name: Release Ops Health Preflight + runs-on: ubuntu-latest + outputs: + reason_code: ${{ steps.preflight.outputs.reason_code }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - id: preflight + name: Enforce ops health preflight + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-ops-health-preflight.json' + try { + & pwsh -NoProfile -File ./scripts/Invoke-OpsMonitoringSnapshot.ps1 ` + -SurfaceRepository '${{ github.repository }}' ` + -RequiredRunnerLabels @('self-hosted', 'windows', 'self-hosted-windows-lv') ` + -OutputPath $reportPath + if ($LASTEXITCODE -ne 0) { + throw 'Ops monitoring snapshot returned non-zero exit.' + } + "reason_code=ok" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + } catch { + "reason_code=ops_unhealthy" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + throw "[ops_unhealthy] $($_.Exception.Message)" + } + + - name: Upload ops health preflight report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-ops-health-preflight-${{ github.run_id }} + path: ${{ runner.temp }}/release-ops-health-preflight.json + if-no-files-found: error + runner_preflight: name: Release Runner Availability Preflight runs-on: ubuntu-latest + needs: [ops_health_preflight] outputs: reason_code: ${{ steps.check.outputs.reason_code }} steps: @@ -121,7 +161,7 @@ jobs: package: name: Package Workspace Installer - needs: [runner_preflight] + needs: [ops_health_preflight, runner_preflight] runs-on: [self-hosted, windows, self-hosted-windows-lv] outputs: asset_name: ${{ steps.package.outputs.asset_name }} @@ -214,6 +254,10 @@ jobs: $manifest = Get-Content -LiteralPath $manifestPath -Raw | ConvertFrom-Json -ErrorAction Stop $releaseArtifactRoot = [string]$manifest.installer_contract.release_build_contract.artifact_root $parityArtifactRoot = [string]$manifest.installer_contract.container_parity_contract.artifact_root + $signaturePolicy = $manifest.installer_contract.release_client.signature_policy + $signatureDualModeStartUtc = [DateTime]::Parse([string]$signaturePolicy.dual_mode_start_utc).ToUniversalTime() + $signatureCanaryEnforceUtc = [DateTime]::Parse([string]$signaturePolicy.canary_enforce_utc).ToUniversalTime() + $signatureGraceEndUtc = [DateTime]::Parse([string]$signaturePolicy.grace_end_utc).ToUniversalTime() if ([string]::IsNullOrWhiteSpace($releaseArtifactRoot)) { $releaseArtifactRoot = 'artifacts\release' } if ([string]::IsNullOrWhiteSpace($parityArtifactRoot)) { $parityArtifactRoot = 'artifacts\parity' } @@ -309,6 +353,29 @@ jobs: Write-Warning 'No code-signing certificate configured. Publishing unsigned installer metadata.' } + $nowUtc = (Get-Date).ToUniversalTime() + $signatureRequired = $false + $signatureEnforcementState = 'pre_dual_mode' + if ($nowUtc -ge $signatureDualModeStartUtc) { + $signatureEnforcementState = 'dual_mode_warning' + if ($releaseChannel -eq 'canary' -and $nowUtc -ge $signatureCanaryEnforceUtc) { + $signatureRequired = $true + $signatureEnforcementState = 'canary_enforced' + } elseif (($releaseChannel -eq 'stable' -or $releaseChannel -eq 'prerelease') -and $nowUtc -ge $signatureGraceEndUtc) { + $signatureRequired = $true + $signatureEnforcementState = 'stable_prerelease_enforced' + } + } + + if ($signatureStatus -ne 'signed_valid') { + if ($signatureRequired) { + throw "[signature_required] Channel '$releaseChannel' requires signed installer artifacts after policy cutoff. now_utc=$($nowUtc.ToString('o')) signature_status=$signatureStatus" + } + if ($nowUtc -ge $signatureDualModeStartUtc) { + Write-Warning "[signature_warning] Unsigned installer is allowed during dual-mode transition. channel=$releaseChannel dual_mode_start_utc=$($signatureDualModeStartUtc.ToString('o')) canary_enforce_utc=$($signatureCanaryEnforceUtc.ToString('o')) grace_end_utc=$($signatureGraceEndUtc.ToString('o'))" + } + } + $assetSha = (Get-FileHash -LiteralPath $assetPath -Algorithm SHA256).Hash.ToLowerInvariant() "{0} *{1}" -f $assetSha, $assetName | Set-Content -LiteralPath (Join-Path $releaseRoot "$assetName.sha256") -Encoding ascii @@ -367,6 +434,11 @@ jobs: signature_subject = $signatureSubject signature_thumbprint = $signatureThumbprint signature_timestamp_utc = $signatureTimestampUtc + signature_enforcement_state = $signatureEnforcementState + signature_required = $signatureRequired + signature_dual_mode_start_utc = $signatureDualModeStartUtc.ToString('o') + signature_canary_enforce_utc = $signatureCanaryEnforceUtc.ToString('o') + signature_grace_end_utc = $signatureGraceEndUtc.ToString('o') } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $metadataPath -Encoding utf8 "asset_name=$assetName" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 diff --git a/.github/workflows/canary-smoke-tag-hygiene.yml b/.github/workflows/canary-smoke-tag-hygiene.yml new file mode 100644 index 0000000..68294e3 --- /dev/null +++ b/.github/workflows/canary-smoke-tag-hygiene.yml @@ -0,0 +1,77 @@ +name: canary-smoke-tag-hygiene + +on: + schedule: + - cron: '35 4 * * *' + workflow_dispatch: + inputs: + target_date_utc: + description: UTC date key in YYYYMMDD format. Defaults to current UTC date when empty. + required: false + default: '' + type: string + keep_latest_n: + description: Number of latest canary smoke tags to keep for the target date. + required: false + default: '1' + type: string + apply_changes: + description: Apply deletions. Set false for dry-run. + required: false + default: true + type: boolean + +permissions: + contents: write + +jobs: + canary-smoke-tag-hygiene: + name: Canary Smoke Tag Hygiene + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Keep latest canary smoke tag only + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'canary-smoke-tag-hygiene-report.json' + + $targetDate = [string]'${{ inputs.target_date_utc }}' + if ([string]::IsNullOrWhiteSpace($targetDate)) { + $targetDate = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') + } + + $keepLatestNText = [string]'${{ inputs.keep_latest_n }}' + $keepLatestN = 1 + if (-not [string]::IsNullOrWhiteSpace($keepLatestNText)) { + $parsedKeepLatestN = 0 + if (-not [int]::TryParse($keepLatestNText, [ref]$parsedKeepLatestN)) { + throw "keep_latest_n must be an integer. actual='$keepLatestNText'" + } + $keepLatestN = $parsedKeepLatestN + } + + $applyChangesText = [string]'${{ inputs.apply_changes }}' + $applyChanges = $true + if (-not [string]::IsNullOrWhiteSpace($applyChangesText)) { + $applyChanges = [System.Convert]::ToBoolean($applyChangesText) + } + + & pwsh -NoProfile -File ./scripts/Invoke-CanarySmokeTagHygiene.ps1 ` + -Repository '${{ github.repository }}' ` + -DateUtc $targetDate ` + -KeepLatestN $keepLatestN ` + -Delete:$applyChanges ` + -OutputPath $reportPath + + - name: Upload canary smoke tag hygiene report + if: always() + uses: actions/upload-artifact@v4 + with: + name: canary-smoke-tag-hygiene-report-${{ github.run_id }} + path: ${{ runner.temp }}/canary-smoke-tag-hygiene-report.json + if-no-files-found: error diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 649620c..0dbdbb3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,13 +65,19 @@ jobs: './tests/VsCodeTasksContract.Tests.ps1', './tests/UploadArtifactRetryCompositeContract.Tests.ps1', './tests/InstallerHarnessWorkflowContract.Tests.ps1', + './tests/OpsMonitoringWorkflowContract.Tests.ps1', + './tests/OpsAutoRemediationWorkflowContract.Tests.ps1', + './tests/ReleaseControlPlaneWorkflowContract.Tests.ps1', + './tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1', + './tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1', './tests/NightlySupplyChainCanaryWorkflowContract.Tests.ps1', './tests/WindowsLabviewImageGateWorkflowContract.Tests.ps1', './tests/LinuxLabviewImageGateWorkflowContract.Tests.ps1', './tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1', './tests/DockerDesktopLinuxIterationContract.Tests.ps1', './tests/WorkspaceInstallerExerciseContract.Tests.ps1', - './tests/WorkspaceInstallerIterationContract.Tests.ps1' + './tests/WorkspaceInstallerIterationContract.Tests.ps1', + './tests/ScopeAOpsRunbookContract.Tests.ps1' ) -CI -Output Detailed workspace-installer-contract: diff --git a/.github/workflows/ops-autoremediate.yml b/.github/workflows/ops-autoremediate.yml new file mode 100644 index 0000000..801c975 --- /dev/null +++ b/.github/workflows/ops-autoremediate.yml @@ -0,0 +1,99 @@ +name: ops-autoremediate + +on: + schedule: + - cron: '45 * * * *' + workflow_dispatch: + inputs: + sync_guard_max_age_hours: + description: Maximum age in hours for latest successful sync-guard run. + required: false + default: '12' + type: string + +permissions: + contents: read + actions: write + issues: write + +jobs: + ops-autoremediate: + name: Ops Auto-Remediation + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Execute deterministic auto-remediation + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-autoremediate-report.json' + $syncGuardAgeHoursText = [string]'${{ inputs.sync_guard_max_age_hours }}' + $syncGuardAgeHours = 12 + if (-not [string]::IsNullOrWhiteSpace($syncGuardAgeHoursText)) { + $parsed = 0 + if (-not [int]::TryParse($syncGuardAgeHoursText, [ref]$parsed)) { + throw "sync_guard_max_age_hours must be an integer. actual='$syncGuardAgeHoursText'" + } + $syncGuardAgeHours = $parsed + } + + & pwsh -NoProfile -File ./scripts/Invoke-OpsAutoRemediation.ps1 ` + -SurfaceRepository '${{ github.repository }}' ` + -SyncGuardMaxAgeHours $syncGuardAgeHours ` + -OutputPath $reportPath + + - name: Upload auto-remediation report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ops-autoremediate-report-${{ github.run_id }} + path: ${{ runner.temp }}/ops-autoremediate-report.json + if-no-files-found: error + + - name: Open or update auto-remediation incident issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops Auto-Remediation Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-autoremediate-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "ops auto-remediation report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Ops auto-remediation failed. + + - Run: $env:RUN_URL + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Surface repository: $($report.surface_repository) + - Sync guard repository: $($report.sync_guard_repository) + "@ + + $existingJson = & gh issue list -R $env:REPOSITORY --state open --search "$title in:title" --json number --limit 1 + if ($LASTEXITCODE -ne 0) { + throw 'Failed to query existing auto-remediation incident issue.' + } + $existing = @($existingJson | ConvertFrom-Json) + if ($existing.Count -gt 0) { + $number = [string]$existing[0].number + & gh issue comment $number -R $env:REPOSITORY --body $body + if ($LASTEXITCODE -ne 0) { + throw "Failed to append auto-remediation issue comment to #$number." + } + } else { + & gh issue create -R $env:REPOSITORY --title $title --body $body + if ($LASTEXITCODE -ne 0) { + throw 'Failed to create auto-remediation incident issue.' + } + } diff --git a/.github/workflows/ops-monitoring.yml b/.github/workflows/ops-monitoring.yml new file mode 100644 index 0000000..86499f2 --- /dev/null +++ b/.github/workflows/ops-monitoring.yml @@ -0,0 +1,105 @@ +name: ops-monitoring + +on: + schedule: + - cron: '15 * * * *' + workflow_dispatch: + inputs: + sync_guard_max_age_hours: + description: Maximum allowed age in hours for latest successful cdev-cli sync-guard run. + required: false + default: '12' + type: string + +permissions: + contents: read + actions: read + issues: write + +jobs: + ops-monitoring: + name: Ops Monitoring Snapshot + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - id: snapshot + name: Collect runner and sync-guard health snapshot + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-monitoring-report.json' + $syncGuardAgeHoursText = [string]'${{ inputs.sync_guard_max_age_hours }}' + $syncGuardAgeHours = 12 + if (-not [string]::IsNullOrWhiteSpace($syncGuardAgeHoursText)) { + $parsed = 0 + if (-not [int]::TryParse($syncGuardAgeHoursText, [ref]$parsed)) { + throw "sync_guard_max_age_hours must be an integer. actual='$syncGuardAgeHoursText'" + } + $syncGuardAgeHours = $parsed + } + + & pwsh -NoProfile -File ./scripts/Invoke-OpsMonitoringSnapshot.ps1 ` + -SurfaceRepository '${{ github.repository }}' ` + -SyncGuardMaxAgeHours $syncGuardAgeHours ` + -OutputPath $reportPath + + - name: Upload ops monitoring report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ops-monitoring-report-${{ github.run_id }} + path: ${{ runner.temp }}/ops-monitoring-report.json + if-no-files-found: error + + - name: Open or update ops monitoring tracking issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops Monitoring Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-monitoring-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "ops monitoring report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $message = [string]$report.message + + $body = @" + Ops monitoring detected a failure. + + - Run: $env:RUN_URL + - Reason codes: $reasonCodeText + - Message: $message + - Surface repository: $($report.surface_repository) + - Sync guard repository: $($report.sync_guard.repository) + "@ + + $existingJson = & gh issue list -R $env:REPOSITORY --state open --search "$title in:title" --json number --limit 1 + if ($LASTEXITCODE -ne 0) { + throw 'Failed to query existing ops monitoring tracking issue.' + } + + $existing = @($existingJson | ConvertFrom-Json) + if ($existing.Count -gt 0) { + $number = [string]$existing[0].number + & gh issue comment $number -R $env:REPOSITORY --body $body + if ($LASTEXITCODE -ne 0) { + throw "Failed to append ops monitoring issue comment to #$number." + } + } else { + & gh issue create -R $env:REPOSITORY --title $title --body $body + if ($LASTEXITCODE -ne 0) { + throw 'Failed to create ops monitoring tracking issue.' + } + } diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml new file mode 100644 index 0000000..b59501e --- /dev/null +++ b/.github/workflows/release-control-plane.yml @@ -0,0 +1,159 @@ +name: release-control-plane + +on: + schedule: + - cron: '20 6 * * *' + workflow_dispatch: + inputs: + mode: + description: Control-plane mode. + required: false + default: FullCycle + type: choice + options: + - Validate + - CanaryCycle + - PromotePrerelease + - PromoteStable + - FullCycle + sync_guard_max_age_hours: + description: Maximum age in hours for latest successful sync-guard run. + required: false + default: '12' + type: string + auto_remediate: + description: Auto-remediate ops drift before release dispatch. + required: false + default: true + type: boolean + keep_latest_canary_n: + description: Number of canary smoke tags to keep per date. + required: false + default: '1' + type: string + dry_run: + description: Run planning and health gates only; do not dispatch releases. + required: false + default: false + type: boolean + +permissions: + contents: write + actions: write + issues: write + +jobs: + release-control-plane: + name: Release Control Plane + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Execute autonomous release control plane + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-report.json' + + $mode = [string]'${{ inputs.mode }}' + if ([string]::IsNullOrWhiteSpace($mode)) { + $mode = 'FullCycle' + } + + $syncGuardAgeHoursText = [string]'${{ inputs.sync_guard_max_age_hours }}' + $syncGuardAgeHours = 12 + if (-not [string]::IsNullOrWhiteSpace($syncGuardAgeHoursText)) { + $parsedSyncGuardAgeHours = 0 + if (-not [int]::TryParse($syncGuardAgeHoursText, [ref]$parsedSyncGuardAgeHours)) { + throw "sync_guard_max_age_hours must be an integer. actual='$syncGuardAgeHoursText'" + } + $syncGuardAgeHours = $parsedSyncGuardAgeHours + } + + $keepLatestCanaryNText = [string]'${{ inputs.keep_latest_canary_n }}' + $keepLatestCanaryN = 1 + if (-not [string]::IsNullOrWhiteSpace($keepLatestCanaryNText)) { + $parsedKeepLatestCanaryN = 0 + if (-not [int]::TryParse($keepLatestCanaryNText, [ref]$parsedKeepLatestCanaryN)) { + throw "keep_latest_canary_n must be an integer. actual='$keepLatestCanaryNText'" + } + $keepLatestCanaryN = $parsedKeepLatestCanaryN + } + + $autoRemediateText = [string]'${{ inputs.auto_remediate }}' + $autoRemediate = $true + if (-not [string]::IsNullOrWhiteSpace($autoRemediateText)) { + $autoRemediate = [System.Convert]::ToBoolean($autoRemediateText) + } + + $dryRunText = [string]'${{ inputs.dry_run }}' + $dryRun = $false + if (-not [string]::IsNullOrWhiteSpace($dryRunText)) { + $dryRun = [System.Convert]::ToBoolean($dryRunText) + } + + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseControlPlane.ps1 ` + -Repository '${{ github.repository }}' ` + -Branch 'main' ` + -Mode $mode ` + -SyncGuardMaxAgeHours $syncGuardAgeHours ` + -KeepLatestCanaryN $keepLatestCanaryN ` + -AutoRemediate:$autoRemediate ` + -DryRun:$dryRun ` + -OutputPath $reportPath + + - name: Upload release control plane report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-control-plane-report-${{ github.run_id }} + path: ${{ runner.temp }}/release-control-plane-report.json + if-no-files-found: error + + - name: Open or update release control plane incident issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Control Plane Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "Release control plane report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Release control plane failed. + + - Run: $env:RUN_URL + - Mode: $($report.mode) + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Repository: $($report.repository) + "@ + + $existingJson = & gh issue list -R $env:REPOSITORY --state open --search "$title in:title" --json number --limit 1 + if ($LASTEXITCODE -ne 0) { + throw 'Failed to query existing release control plane incident issue.' + } + + $existing = @($existingJson | ConvertFrom-Json) + if ($existing.Count -gt 0) { + $number = [string]$existing[0].number + & gh issue comment $number -R $env:REPOSITORY --body $body + if ($LASTEXITCODE -ne 0) { + throw "Failed to append release control plane issue comment to #$number." + } + } else { + & gh issue create -R $env:REPOSITORY --title $title --body $body + if ($LASTEXITCODE -ne 0) { + throw 'Failed to create release control plane incident issue.' + } + } diff --git a/.github/workflows/release-with-windows-gate.yml b/.github/workflows/release-with-windows-gate.yml index 7d9477d..67be5a5 100644 --- a/.github/workflows/release-with-windows-gate.yml +++ b/.github/workflows/release-with-windows-gate.yml @@ -60,15 +60,49 @@ jobs: throw "Release orchestration is restricted to '$expectedRepo'. Current repository '$actualRepo' is not allowed." } + ops_health_gate: + name: Ops Health Gate + runs-on: ubuntu-latest + needs: [repo_guard] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Enforce ops health preflight + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-ops-health-gate-report.json' + try { + & pwsh -NoProfile -File ./scripts/Invoke-OpsMonitoringSnapshot.ps1 ` + -SurfaceRepository '${{ github.repository }}' ` + -OutputPath $reportPath + if ($LASTEXITCODE -ne 0) { + throw 'ops snapshot failed.' + } + } catch { + throw "[ops_unhealthy] $($_.Exception.Message)" + } + + - name: Upload ops health gate report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-ops-health-gate-${{ github.run_id }} + path: ${{ runner.temp }}/release-ops-health-gate-report.json + if-no-files-found: error + windows_gate: name: Windows Gate - needs: [repo_guard] + needs: [repo_guard, ops_health_gate] uses: ./.github/workflows/_windows-labview-image-gate-core.yml secrets: inherit linux_gate: name: Linux Gate - needs: [repo_guard] + needs: [repo_guard, ops_health_gate] uses: ./.github/workflows/_linux-labview-image-gate-core.yml secrets: inherit @@ -76,7 +110,7 @@ jobs: name: Gate Policy runs-on: ubuntu-latest if: ${{ always() }} - needs: [repo_guard, windows_gate, linux_gate] + needs: [repo_guard, ops_health_gate, windows_gate, linux_gate] outputs: override_applied: ${{ steps.evaluate.outputs.override_applied }} override_reason: ${{ steps.evaluate.outputs.override_reason }} @@ -87,6 +121,7 @@ jobs: shell: pwsh env: REPO_GUARD_RESULT: ${{ needs.repo_guard.result }} + OPS_HEALTH_GATE_RESULT: ${{ needs.ops_health_gate.result }} WINDOWS_GATE_RESULT: ${{ needs.windows_gate.result }} LINUX_GATE_RESULT: ${{ needs.linux_gate.result }} ALLOW_GATE_OVERRIDE: ${{ inputs.allow_gate_override }} @@ -96,6 +131,7 @@ jobs: $ErrorActionPreference = 'Stop' $repoGuardResult = [string]$env:REPO_GUARD_RESULT + $opsHealthGateResult = [string]$env:OPS_HEALTH_GATE_RESULT $windowsGateResult = [string]$env:WINDOWS_GATE_RESULT $linuxGateResult = [string]$env:LINUX_GATE_RESULT $allowOverride = [System.Convert]::ToBoolean([string]$env:ALLOW_GATE_OVERRIDE) @@ -106,6 +142,9 @@ jobs: if ($repoGuardResult -ne 'success') { throw "Repository guard did not succeed. Blocking release publish." } + if ($opsHealthGateResult -ne 'success') { + throw "Ops health gate did not succeed. Blocking release publish." + } if ($windowsGateResult -eq 'success' -and $linuxGateResult -eq 'success') { Write-Host 'Windows and Linux gates passed. Release publish is allowed.' } else { diff --git a/.github/workflows/weekly-ops-slo-report.yml b/.github/workflows/weekly-ops-slo-report.yml new file mode 100644 index 0000000..6a3b3db --- /dev/null +++ b/.github/workflows/weekly-ops-slo-report.yml @@ -0,0 +1,54 @@ +name: weekly-ops-slo-report + +on: + schedule: + - cron: '10 8 * * 1' + workflow_dispatch: + inputs: + lookback_days: + description: Lookback window in days for SLO calculation. + required: false + default: '7' + type: string + +permissions: + contents: read + actions: read + +jobs: + weekly-ops-slo-report: + name: Weekly Ops SLO Report + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build weekly SLO report + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'weekly-ops-slo-report.json' + $lookbackDaysText = [string]'${{ inputs.lookback_days }}' + $lookbackDays = 7 + if (-not [string]::IsNullOrWhiteSpace($lookbackDaysText)) { + $parsed = 0 + if (-not [int]::TryParse($lookbackDaysText, [ref]$parsed)) { + throw "lookback_days must be an integer. actual='$lookbackDaysText'" + } + $lookbackDays = $parsed + } + + & pwsh -NoProfile -File ./scripts/Write-OpsSloReport.ps1 ` + -SurfaceRepository '${{ github.repository }}' ` + -LookbackDays $lookbackDays ` + -OutputPath $reportPath + + - name: Upload weekly SLO report + if: always() + uses: actions/upload-artifact@v4 + with: + name: weekly-ops-slo-report-${{ github.run_id }} + path: ${{ runner.temp }}/weekly-ops-slo-report.json + if-no-files-found: error diff --git a/AGENTS.md b/AGENTS.md index dcd0ff5..2df70e3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -171,6 +171,40 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `nightly-supplychain-canary.yml` is the scheduled drift and reproducibility signal. - Canary failures must update a single tracking issue; do not disable canary to bypass failures. +## Ops Monitoring Policy +- `.github/workflows/ops-monitoring.yml` is the authoritative hourly ops snapshot workflow. +- It must run `scripts/Invoke-OpsMonitoringSnapshot.ps1` and fail with deterministic reason codes when runner or sync-guard health drifts. +- Ops snapshot reason codes must remain explicit: + - `runner_unavailable` + - `sync_guard_failed` + - `sync_guard_stale` + - `sync_guard_missing` + - `sync_guard_incomplete` +- Failure path must upload `ops-monitoring-report.json` and update a single issue titled `Ops Monitoring Alert`. +- `.github/workflows/canary-smoke-tag-hygiene.yml` is the canary smoke tag retention workflow. +- It must run `scripts/Invoke-CanarySmokeTagHygiene.ps1` and enforce deterministic keep-latest behavior for tags matching `v0.YYYYMMDD.N`. +- `.github/workflows/ops-autoremediate.yml` is the deterministic remediation workflow and must run `scripts/Invoke-OpsAutoRemediation.ps1`. +- Auto-remediation reason codes must remain explicit: + - `already_healthy` + - `remediated` + - `manual_intervention_required` + - `no_automatable_action` + - `remediation_failed` +- `.github/workflows/release-control-plane.yml` is the autonomous release orchestrator and must run `scripts/Invoke-ReleaseControlPlane.ps1`. +- Control-plane mode contract: + - `Validate` + - `CanaryCycle` + - `PromotePrerelease` + - `PromoteStable` + - `FullCycle` +- Channel tag windows are deterministic for `v0.YYYYMMDD.N`: + - canary: `1-49` + - prerelease: `50-79` + - stable: `80-99` +- Promotion must gate on source release integrity (required assets + source commit equals branch head). +- `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. +- Operational incident handling runbook is `docs/runbooks/release-ops-incident-response.md`. + ## Integration Gate Policy - `.github/workflows/integration-gate.yml` is the integration-branch aggregator workflow. - It must gate on required contexts: `CI Pipeline`, `Workspace Installer Contract`, `Reproducibility Contract`, `Provenance Contract`. diff --git a/README.md b/README.md index 27200c3..eb43bbf 100644 --- a/README.md +++ b/README.md @@ -296,6 +296,39 @@ Fork/upstream cdev-cli synchronization policy starts with full sync metadata: Release channel metadata can be set during publish with workflow input `release_channel` (`stable`, `prerelease`, `canary`). +## Ops monitoring and hygiene + +`ops-monitoring.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Invoke-OpsMonitoringSnapshot.ps1` and fails on: +- runner availability drift (`runner_unavailable`) +- cdev-cli sync-guard drift/failure (`sync_guard_failed`, `sync_guard_stale`, `sync_guard_missing`, `sync_guard_incomplete`) + +Every run uploads `ops-monitoring-report.json`. On failure, automation updates a single tracking issue (`Ops Monitoring Alert`). + +`canary-smoke-tag-hygiene.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-CanarySmokeTagHygiene.ps1` to keep latest `v0.YYYYMMDD.N` canary smoke tag(s) for a UTC date and delete older tags deterministically. + +`ops-autoremediate.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Invoke-OpsAutoRemediation.ps1` to: +- auto-dispatch and verify cdev-cli sync-guard when sync drift is detected +- re-evaluate health after remediation +- fail with deterministic reason codes when manual intervention is still required + +`release-control-plane.yml` is the autonomous orchestrator. It runs `scripts/Invoke-ReleaseControlPlane.ps1` with modes: +- `CanaryCycle` +- `PromotePrerelease` +- `PromoteStable` +- `FullCycle` +- `Validate` + +Control-plane behavior: +1. Runs ops health gate and optional auto-remediation. +2. Dispatches release workflow with deterministic channel-specific tag windows (`canary=1-49`, `prerelease=50-79`, `stable=80-99` for `v0.YYYYMMDD.N`). +3. Verifies run completion. +4. Applies canary smoke tag hygiene after canary publish. + +`weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. + +Runbook for incidents: +- `docs/runbooks/release-ops-incident-response.md` + ## Nightly canary `nightly-supplychain-canary.yml` runs on a nightly schedule and on demand. It executes: diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md new file mode 100644 index 0000000..8e00417 --- /dev/null +++ b/docs/runbooks/release-ops-incident-response.md @@ -0,0 +1,126 @@ +# Release Ops Incident Response Runbook + +## Purpose +Deterministic operator response for Scope A hardening controls: +- runner availability monitoring +- cdev-cli fork/upstream sync-guard monitoring +- canary smoke tag hygiene + +## Inputs +- Surface repository: `LabVIEW-Community-CI-CD/labview-cdev-surface-fork` +- Sync-guard repository: `LabVIEW-Community-CI-CD/labview-cdev-cli` +- Runner root (service mode): `D:\dev\gh-runner-surface-fork` + +## Triage +1. Open latest `ops-monitoring` run and inspect `ops-monitoring-report-` artifact. +2. Read `reason_codes`. +3. Execute remediation by reason code. +4. If remediation is automatable, dispatch `ops-autoremediate.yml` first and re-check health. + +Reason code mapping: +- `runner_unavailable`: no online self-hosted runner matched required labels. +- `sync_guard_failed`: latest completed cdev-cli sync-guard run failed. +- `sync_guard_stale`: latest successful sync-guard run exceeded max-age policy. +- `sync_guard_missing`: no sync-guard run found for branch. +- `sync_guard_incomplete`: only in-progress/queued runs exist; no completed run yet. + +## Runner Unavailable Remediation +1. Verify repository runner state: + +```powershell +gh api repos/LabVIEW-Community-CI-CD/labview-cdev-surface-fork/actions/runners ` + --jq '.runners[] | {name,status,busy,labels:(.labels|map(.name))}' +``` + +2. On runner host, verify service is running and automatic: + +```powershell +Get-Service -Name 'actions.runner.LabVIEW-Community-CI-CD-labview-cdev-surface-fork*' | + Select-Object Name, Status, StartType +``` + +3. If stopped, restart: + +```powershell +Start-Service -Name 'actions.runner.LabVIEW-Community-CI-CD-labview-cdev-surface-fork*' +``` + +4. Re-run `ops-monitoring` by dispatch and confirm pass. + +## Sync Guard Drift Remediation +1. Dispatch upstream sync guard: + +```powershell +gh workflow run fork-upstream-sync-guard --repo LabVIEW-Community-CI-CD/labview-cdev-cli +``` + +2. Watch result: + +```powershell +gh run list --repo LabVIEW-Community-CI-CD/labview-cdev-cli --workflow fork-upstream-sync-guard --limit 1 +``` + +3. If failed due fork/upstream drift, run controlled force-align from cdev-cli repo: + +```powershell +Set-Location D:\dev\labview-cdev-cli +pwsh -File .\scripts\Invoke-ControlledForkForceAlign.ps1 +``` + +4. Re-check parity: + +```powershell +gh api repos/LabVIEW-Community-CI-CD/labview-cdev-cli/commits/main --jq .sha +gh api repos/svelderrainruiz/labview-cdev-cli/commits/main --jq .sha +``` + +5. Dispatch auto-remediation workflow (preferred control-plane path): + +```powershell +gh workflow run ops-autoremediate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +## Canary Smoke Tag Hygiene Remediation +Keep latest only for one UTC date key (`YYYYMMDD`): + +```powershell +Set-Location D:\dev\labview-cdev-surface-fork +pwsh -File .\scripts\Invoke-CanarySmokeTagHygiene.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -DateUtc 20260226 ` + -KeepLatestN 1 ` + -Delete +``` + +Dry-run before deletion: + +```powershell +pwsh -File .\scripts\Invoke-CanarySmokeTagHygiene.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -DateUtc 20260226 ` + -KeepLatestN 1 +``` + +## Autonomous Control Plane Dispatch +Run full autonomous cycle manually: + +```powershell +gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f mode=FullCycle ` + -f auto_remediate=true ` + -f dry_run=false +``` + +Run validation-only health/policy gate: + +```powershell +gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f mode=Validate ` + -f dry_run=true +``` + +## Evidence to Attach to Incident +- `ops-monitoring-report.json` +- `canary-smoke-tag-hygiene-report.json` +- sync guard run URL +- parity SHAs (upstream and fork) diff --git a/scripts/Invoke-CanarySmokeTagHygiene.ps1 b/scripts/Invoke-CanarySmokeTagHygiene.ps1 new file mode 100644 index 0000000..dc9b919 --- /dev/null +++ b/scripts/Invoke-CanarySmokeTagHygiene.ps1 @@ -0,0 +1,177 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[0-9]{8}$')] + [string]$DateUtc = (Get-Date).ToUniversalTime().ToString('yyyyMMdd'), + + [Parameter()] + [ValidateRange(1, 10)] + [int]$KeepLatestN = 1, + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string]$CanaryTagRegex = '^v0\.(?\d{8})\.(?\d+)$', + + [Parameter()] + [bool]$RequirePrerelease = $true, + + [Parameter()] + [ValidateRange(1, 100)] + [int]$MaxDeleteCount = 20, + + [Parameter()] + [switch]$Delete, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$report = [ordered]@{ + schema_version = '1.0' + timestamp_utc = Get-UtcNowIso + repository = $Repository + target_date_utc = $DateUtc + canary_tag_regex = $CanaryTagRegex + require_prerelease = $RequirePrerelease + keep_latest_n = $KeepLatestN + delete_enabled = [bool]$Delete + max_delete_count = $MaxDeleteCount + status = 'fail' + reason_code = '' + message = '' + releases_scanned = 0 + candidate_count = 0 + kept_tags = @() + delete_candidates = @() + deleted_tags = @() +} + +try { + $releaseList = @(Invoke-GhJson -Arguments @( + 'release', 'list', + '-R', $Repository, + '--limit', '200', + '--exclude-drafts', + '--json', 'tagName,isPrerelease,publishedAt' + )) + $report.releases_scanned = @($releaseList).Count + + $candidates = @() + foreach ($release in $releaseList) { + $tagName = [string]$release.tagName + if ([string]::IsNullOrWhiteSpace($tagName)) { + continue + } + + $match = [regex]::Match($tagName, $CanaryTagRegex) + if (-not $match.Success) { + continue + } + + $tagDate = [string]$match.Groups['date'].Value + if ($tagDate -ne $DateUtc) { + continue + } + + $sequenceText = [string]$match.Groups['sequence'].Value + $sequence = 0 + if (-not [int]::TryParse($sequenceText, [ref]$sequence)) { + continue + } + + $isPrerelease = [bool]$release.isPrerelease + if ($RequirePrerelease -and -not $isPrerelease) { + continue + } + + $publishedAt = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$release.publishedAt, [ref]$publishedAt) + + $candidates += [ordered]@{ + tag_name = $tagName + sequence = $sequence + is_prerelease = $isPrerelease + published_at_utc = if ($publishedAt -eq [DateTimeOffset]::MinValue) { '' } else { $publishedAt.ToUniversalTime().ToString('o') } + } + } + + $orderedCandidates = @( + $candidates | Sort-Object ` + @{ Expression = { [int]$_.sequence }; Descending = $true }, ` + @{ Expression = { + $parsed = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$_.published_at_utc, [ref]$parsed) + $parsed + }; Descending = $true }, ` + @{ Expression = { [string]$_.tag_name }; Descending = $false } + ) + + $report.candidate_count = @($orderedCandidates).Count + + if (@($orderedCandidates).Count -eq 0) { + $report.status = 'pass' + $report.reason_code = 'no_matching_tags' + $report.message = "No canary releases matched date '$DateUtc'." + } else { + $kept = @($orderedCandidates | Select-Object -First $KeepLatestN) + $deleteCandidates = @($orderedCandidates | Select-Object -Skip $KeepLatestN) + + $report.kept_tags = @($kept) + $report.delete_candidates = @($deleteCandidates) + + if (@($deleteCandidates).Count -gt $MaxDeleteCount) { + throw "delete_count_exceeds_guard: deleteCandidates=$(@($deleteCandidates).Count) max=$MaxDeleteCount" + } + + $deleted = @() + if ($Delete) { + foreach ($candidate in $deleteCandidates) { + Invoke-Gh -Arguments @( + 'release', 'delete', + [string]$candidate.tag_name, + '-R', $Repository, + '--yes', + '--cleanup-tag' + ) + + $deleted += [ordered]@{ + tag_name = [string]$candidate.tag_name + deleted_at_utc = Get-UtcNowIso + } + } + + $report.deleted_tags = @($deleted) + $report.status = 'pass' + $report.reason_code = 'applied' + $report.message = "Deleted $(@($deleted).Count) stale canary release tags for date '$DateUtc'." + } else { + $report.status = 'pass' + $report.reason_code = 'dry_run' + $report.message = "Dry-run only. $(@($deleteCandidates).Count) stale canary tags would be deleted for date '$DateUtc'." + } + } +} +catch { + $report.status = 'fail' + $report.reason_code = 'hygiene_failed' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Invoke-OpsAutoRemediation.ps1 b/scripts/Invoke-OpsAutoRemediation.ps1 new file mode 100644 index 0000000..9571e15 --- /dev/null +++ b/scripts/Invoke-OpsAutoRemediation.ps1 @@ -0,0 +1,200 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SurfaceRepository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SyncGuardRepository = 'LabVIEW-Community-CI-CD/labview-cdev-cli', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$SyncGuardWorkflow = 'fork-upstream-sync-guard', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$SyncGuardBranch = 'main', + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [ValidateRange(5, 180)] + [int]$WatchTimeoutMinutes = 45, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$opsSnapshotScript = Join-Path $PSScriptRoot 'Invoke-OpsMonitoringSnapshot.ps1' +$dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' +$watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' + +foreach ($requiredScript in @($opsSnapshotScript, $dispatchWorkflowScript, $watchWorkflowScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +$tempRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("ops-auto-remediate-" + [Guid]::NewGuid().ToString('N')) +New-Item -Path $tempRoot -ItemType Directory -Force | Out-Null + +$report = [ordered]@{ + schema_version = '1.0' + timestamp_utc = Get-UtcNowIso + surface_repository = $SurfaceRepository + sync_guard_repository = $SyncGuardRepository + sync_guard_workflow = $SyncGuardWorkflow + sync_guard_branch = $SyncGuardBranch + sync_guard_max_age_hours = $SyncGuardMaxAgeHours + status = 'fail' + reason_code = '' + message = '' + pre_health = $null + post_health = $null + actions = @() +} + +try { + $preHealthPath = Join-Path $tempRoot 'pre-health.json' + & pwsh -NoProfile -File $opsSnapshotScript ` + -SurfaceRepository $SurfaceRepository ` + -SyncGuardRepository $SyncGuardRepository ` + -SyncGuardWorkflow $SyncGuardWorkflow ` + -SyncGuardBranch $SyncGuardBranch ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $preHealthPath + if ($LASTEXITCODE -ne 0) { + throw "ops_snapshot_failed_pre: exit_code=$LASTEXITCODE" + } + $report.pre_health = Get-Content -LiteralPath $preHealthPath -Raw | ConvertFrom-Json -ErrorAction Stop + + if ([string]$report.pre_health.status -eq 'pass') { + $report.status = 'pass' + $report.reason_code = 'already_healthy' + $report.message = 'Ops health is already green. No remediation required.' + } else { + $preReasons = @($report.pre_health.reason_codes | ForEach-Object { [string]$_ }) + $syncGuardReasons = @('sync_guard_failed', 'sync_guard_stale', 'sync_guard_missing', 'sync_guard_incomplete') + $hasAutomatableSyncGuardDrift = @($preReasons | Where-Object { $syncGuardReasons -contains $_ }).Count -gt 0 + + if ($hasAutomatableSyncGuardDrift) { + $dispatchReportPath = Join-Path $tempRoot 'sync-guard-dispatch.json' + & pwsh -NoProfile -File $dispatchWorkflowScript ` + -Repository $SyncGuardRepository ` + -WorkflowFile $SyncGuardWorkflow ` + -Branch $SyncGuardBranch ` + -OutputPath $dispatchReportPath + if ($LASTEXITCODE -ne 0) { + throw "sync_guard_dispatch_failed: exit_code=$LASTEXITCODE" + } + $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + + $watchReportPath = Join-Path $tempRoot 'sync-guard-watch.json' + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $SyncGuardRepository ` + -RunId ([string]$dispatchReport.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -OutputPath $watchReportPath + if ($LASTEXITCODE -ne 0) { + throw "sync_guard_watch_failed: exit_code=$LASTEXITCODE" + } + $watchReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + + $report.actions = @( + [ordered]@{ + action = 'dispatch_sync_guard' + status = 'success' + run_id = [string]$dispatchReport.run_id + run_url = [string]$dispatchReport.url + }, + [ordered]@{ + action = 'watch_sync_guard' + status = [string]$watchReport.conclusion + run_id = [string]$watchReport.run_id + run_url = [string]$watchReport.url + } + ) + } else { + $report.actions = @( + [ordered]@{ + action = 'no_automatable_action' + status = 'skipped' + detail = "reason_codes=$([string]::Join(',', $preReasons))" + } + ) + } + + $postHealthPath = Join-Path $tempRoot 'post-health.json' + $postSucceeded = $false + try { + & pwsh -NoProfile -File $opsSnapshotScript ` + -SurfaceRepository $SurfaceRepository ` + -SyncGuardRepository $SyncGuardRepository ` + -SyncGuardWorkflow $SyncGuardWorkflow ` + -SyncGuardBranch $SyncGuardBranch ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $postHealthPath + if ($LASTEXITCODE -eq 0) { + $postSucceeded = $true + } + } catch { + $postSucceeded = $false + } + + if (Test-Path -LiteralPath $postHealthPath -PathType Leaf) { + $report.post_health = Get-Content -LiteralPath $postHealthPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + + if ($postSucceeded -and $null -ne $report.post_health -and [string]$report.post_health.status -eq 'pass') { + $report.status = 'pass' + $report.reason_code = 'remediated' + $report.message = 'Auto-remediation recovered ops health to pass.' + } else { + $postReasons = @() + if ($null -ne $report.post_health) { + $postReasons = @($report.post_health.reason_codes | ForEach-Object { [string]$_ }) + } + $manualRequired = (@($postReasons | Where-Object { $_ -eq 'runner_unavailable' }).Count -gt 0) -or + (@($preReasons | Where-Object { $_ -eq 'runner_unavailable' }).Count -gt 0) + if ($manualRequired) { + $report.status = 'fail' + $report.reason_code = 'manual_intervention_required' + $report.message = "Runner availability requires manual intervention. reason_codes=$([string]::Join(',', @($postReasons)))" + } elseif ($hasAutomatableSyncGuardDrift) { + $report.status = 'fail' + $report.reason_code = 'remediation_incomplete' + $report.message = "Auto-remediation attempted but health is still failing. reason_codes=$([string]::Join(',', @($postReasons)))" + } else { + $report.status = 'fail' + $report.reason_code = 'no_automatable_action' + $report.message = "No automatable action for current ops failure. reason_codes=$([string]::Join(',', @($preReasons)))" + } + } + } +} +catch { + $report.status = 'fail' + $report.reason_code = 'remediation_failed' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + if (Test-Path -LiteralPath $tempRoot -PathType Container) { + Remove-Item -LiteralPath $tempRoot -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Invoke-OpsMonitoringSnapshot.ps1 b/scripts/Invoke-OpsMonitoringSnapshot.ps1 new file mode 100644 index 0000000..f1a0e7d --- /dev/null +++ b/scripts/Invoke-OpsMonitoringSnapshot.ps1 @@ -0,0 +1,244 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SurfaceRepository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string[]]$RequiredRunnerLabels = @( + 'self-hosted', + 'windows', + 'self-hosted-windows-lv', + 'windows-containers', + 'user-session', + 'cdev-surface-windows-gate' + ), + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SyncGuardRepository = 'LabVIEW-Community-CI-CD/labview-cdev-cli', + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string]$SyncGuardWorkflow = 'fork-upstream-sync-guard', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$SyncGuardBranch = 'main', + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Convert-RunRecord { + param([Parameter(Mandatory = $true)][object]$Run) + + $runTimestamp = Get-RunTimestampUtc -Run $Run + return [ordered]@{ + run_id = [string]$Run.databaseId + status = [string]$Run.status + conclusion = [string]$Run.conclusion + event = [string]$Run.event + head_sha = [string]$Run.headSha + created_at_utc = if ($runTimestamp -eq [DateTimeOffset]::MinValue) { '' } else { $runTimestamp.ToString('o') } + url = [string]$Run.url + } +} + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +function Get-RunTimestampUtc { + param([Parameter(Mandatory = $true)][object]$Run) + + $created = $Run.createdAt + if ($created -is [DateTimeOffset]) { + return ([DateTimeOffset]$created).ToUniversalTime() + } + if ($created -is [DateTime]) { + return [DateTimeOffset]::new(([DateTime]$created).ToUniversalTime()) + } + + $createdText = ([string]$created).Trim() + if ([string]::IsNullOrWhiteSpace($createdText)) { + return [DateTimeOffset]::MinValue + } + + $parsed = [DateTimeOffset]::MinValue + if ([DateTimeOffset]::TryParse( + $createdText, + [System.Globalization.CultureInfo]::InvariantCulture, + [System.Globalization.DateTimeStyles]::AssumeUniversal, + [ref]$parsed)) { + return $parsed.ToUniversalTime() + } + + if ([DateTimeOffset]::TryParse($createdText, [ref]$parsed)) { + return $parsed.ToUniversalTime() + } + + return [DateTimeOffset]::MinValue +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() +$report = [ordered]@{ + schema_version = '1.0' + timestamp_utc = Get-UtcNowIso + surface_repository = $SurfaceRepository + required_runner_labels = @() + runner_summary = [ordered]@{ + total = 0 + online = 0 + eligible = 0 + } + eligible_runners = @() + sync_guard = [ordered]@{ + repository = $SyncGuardRepository + workflow = $SyncGuardWorkflow + branch = $SyncGuardBranch + max_age_hours = $SyncGuardMaxAgeHours + latest_run = $null + latest_completed_run = $null + latest_success_run = $null + latest_success_age_hours = $null + } + status = 'fail' + reason_codes = @() + message = '' +} + +try { + $normalizedRequiredLabels = @( + @($RequiredRunnerLabels | ForEach-Object { ([string]$_).ToLowerInvariant().Trim() } | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) | + Sort-Object -Unique + ) + $report.required_runner_labels = $normalizedRequiredLabels + + $runnerPayload = Invoke-GhJson -Arguments @('api', "repos/$SurfaceRepository/actions/runners?per_page=100") + $allRunners = @($runnerPayload.runners) + $onlineRunners = @() + $eligibleRunners = @() + + foreach ($runner in $allRunners) { + $labelMap = @{} + foreach ($label in @($runner.labels)) { + $name = ([string]$label.name).ToLowerInvariant().Trim() + if (-not [string]::IsNullOrWhiteSpace($name)) { + $labelMap[$name] = $true + } + } + + $runnerRecord = [ordered]@{ + name = [string]$runner.name + status = [string]$runner.status + busy = [bool]$runner.busy + labels = @($runner.labels | ForEach-Object { [string]$_.name }) + missing_required_labels = @($normalizedRequiredLabels | Where-Object { -not $labelMap.ContainsKey($_) }) + } + + if ([string]$runner.status -eq 'online') { + $onlineRunners += $runnerRecord + if (@($runnerRecord.missing_required_labels).Count -eq 0) { + $eligibleRunners += $runnerRecord + } + } + } + + $report.runner_summary.total = @($allRunners).Count + $report.runner_summary.online = @($onlineRunners).Count + $report.runner_summary.eligible = @($eligibleRunners).Count + $report.eligible_runners = @($eligibleRunners) + + if (@($eligibleRunners).Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'runner_unavailable' + } + + $syncRunsRaw = @(Invoke-GhJson -Arguments @( + 'run', 'list', + '-R', $SyncGuardRepository, + '--workflow', $SyncGuardWorkflow, + '--branch', $SyncGuardBranch, + '--limit', '25', + '--json', 'databaseId,status,conclusion,url,createdAt,headSha,event' + )) + $syncRuns = @($syncRunsRaw | Sort-Object { Get-RunTimestampUtc -Run $_ } -Descending) + + $latestRun = $null + if (@($syncRuns).Count -gt 0) { + $latestRun = $syncRuns[0] + $report.sync_guard.latest_run = Convert-RunRecord -Run $latestRun + } + + $latestCompletedRun = @($syncRuns | Where-Object { [string]$_.status -eq 'completed' } | Select-Object -First 1) + if (@($latestCompletedRun).Count -eq 1) { + $report.sync_guard.latest_completed_run = Convert-RunRecord -Run $latestCompletedRun[0] + if ([string]$latestCompletedRun[0].conclusion -ne 'success') { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_failed' + } + } + + $latestSuccessRun = @($syncRuns | Where-Object { [string]$_.status -eq 'completed' -and [string]$_.conclusion -eq 'success' } | Select-Object -First 1) + if (@($latestSuccessRun).Count -ne 1) { + if (@($syncRuns).Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_missing' + } elseif (@($latestCompletedRun).Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_incomplete' + } else { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_failed' + } + } else { + $successRun = $latestSuccessRun[0] + $report.sync_guard.latest_success_run = Convert-RunRecord -Run $successRun + + $successTimestamp = Get-RunTimestampUtc -Run $successRun + $ageHours = [Math]::Round((((Get-Date).ToUniversalTime() - $successTimestamp.UtcDateTime).TotalHours), 2) + $report.sync_guard.latest_success_age_hours = $ageHours + + if ($ageHours -gt $SyncGuardMaxAgeHours) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_stale' + } + } + + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Operations monitoring snapshot passed.' + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Operations monitoring snapshot failed. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } +} +catch { + $report.status = 'fail' + $report.reason_codes = @('ops_monitor_runtime_error') + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +throw ("[ops_monitor_failed] {0}" -f [string]$report.message) diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 new file mode 100644 index 0000000..77765f3 --- /dev/null +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -0,0 +1,452 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$Branch = 'main', + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string]$ReleaseWorkflowFile = 'release-workspace-installer.yml', + + [Parameter()] + [ValidateSet('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle')] + [string]$Mode = 'FullCycle', + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [ValidateRange(1, 10)] + [int]$KeepLatestCanaryN = 1, + + [Parameter()] + [bool]$AutoRemediate = $true, + + [Parameter()] + [ValidateRange(5, 240)] + [int]$WatchTimeoutMinutes = 120, + + [Parameter()] + [switch]$DryRun, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$opsSnapshotScript = Join-Path $PSScriptRoot 'Invoke-OpsMonitoringSnapshot.ps1' +$opsRemediateScript = Join-Path $PSScriptRoot 'Invoke-OpsAutoRemediation.ps1' +$dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' +$watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' +$canaryHygieneScript = Join-Path $PSScriptRoot 'Invoke-CanarySmokeTagHygiene.ps1' + +foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $dispatchWorkflowScript, $watchWorkflowScript, $canaryHygieneScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +function Get-ModeConfig { + param([Parameter(Mandatory = $true)][string]$ModeName) + + switch ($ModeName) { + 'CanaryCycle' { + return [ordered]@{ + channel = 'canary' + prerelease = $true + range_min = 1 + range_max = 49 + source_channel_for_promotion = '' + enforce_prerelease_source = $false + } + } + 'PromotePrerelease' { + return [ordered]@{ + channel = 'prerelease' + prerelease = $true + range_min = 50 + range_max = 79 + source_channel_for_promotion = 'canary' + enforce_prerelease_source = $true + } + } + 'PromoteStable' { + return [ordered]@{ + channel = 'stable' + prerelease = $false + range_min = 80 + range_max = 99 + source_channel_for_promotion = 'prerelease' + enforce_prerelease_source = $true + } + } + default { + throw "unsupported_mode_config: $ModeName" + } + } +} + +function Parse-ReleaseTag { + param([Parameter(Mandatory = $true)][string]$TagName) + + $match = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') + if (-not $match.Success) { + return $null + } + + $sequence = 0 + if (-not [int]::TryParse([string]$match.Groups['sequence'].Value, [ref]$sequence)) { + return $null + } + + return [ordered]@{ + tag_name = $TagName + date = [string]$match.Groups['date'].Value + sequence = $sequence + } +} + +function Get-ReleaseRecordsForDate { + param( + [Parameter(Mandatory = $true)][object[]]$ReleaseList, + [Parameter(Mandatory = $true)][string]$DateKey + ) + + $records = @() + foreach ($release in $ReleaseList) { + $parsed = Parse-ReleaseTag -TagName ([string]$release.tagName) + if ($null -eq $parsed) { + continue + } + if ([string]$parsed.date -ne $DateKey) { + continue + } + + $records += [ordered]@{ + tag_name = [string]$parsed.tag_name + date = [string]$parsed.date + sequence = [int]$parsed.sequence + is_prerelease = [bool]$release.isPrerelease + published_at_utc = [string]$release.publishedAt + } + } + + return @($records | Sort-Object @{ Expression = { [int]$_.sequence }; Descending = $true }) +} + +function Get-LatestRecordInRange { + param( + [Parameter(Mandatory = $true)][object[]]$Records, + [Parameter(Mandatory = $true)][int]$RangeMin, + [Parameter(Mandatory = $true)][int]$RangeMax + ) + + return @( + $Records | + Where-Object { [int]$_.sequence -ge $RangeMin -and [int]$_.sequence -le $RangeMax } | + Sort-Object @{ Expression = { [int]$_.sequence }; Descending = $true } | + Select-Object -First 1 + ) +} + +function Invoke-ReleaseMode { + param( + [Parameter(Mandatory = $true)][string]$ModeName, + [Parameter(Mandatory = $true)][string]$DateKey, + [Parameter(Mandatory = $true)][string]$ScratchRoot, + [Parameter(Mandatory = $true)][hashtable]$ExecutionReport + ) + + $modeConfig = Get-ModeConfig -ModeName $ModeName + $releaseList = @(Invoke-GhJson -Arguments @( + 'release', 'list', + '-R', $Repository, + '--limit', '200', + '--exclude-drafts', + '--json', 'tagName,isPrerelease,publishedAt' + )) + + $records = @(Get-ReleaseRecordsForDate -ReleaseList $releaseList -DateKey $DateKey) + $targetRangeRecords = @( + $records | + Where-Object { [int]$_.sequence -ge [int]$modeConfig.range_min -and [int]$_.sequence -le [int]$modeConfig.range_max } | + Sort-Object @{ Expression = { [int]$_.sequence }; Descending = $true } + ) + + if (-not [string]::IsNullOrWhiteSpace([string]$modeConfig.source_channel_for_promotion)) { + $sourceRange = switch ([string]$modeConfig.source_channel_for_promotion) { + 'canary' { [ordered]@{ min = 1; max = 49 } } + 'prerelease' { [ordered]@{ min = 50; max = 79 } } + default { throw "unsupported_source_channel: $([string]$modeConfig.source_channel_for_promotion)" } + } + $sourceRecord = @(Get-LatestRecordInRange -Records $records -RangeMin $sourceRange.min -RangeMax $sourceRange.max) + if (@($sourceRecord).Count -ne 1) { + throw "promotion_source_missing: channel=$([string]$modeConfig.source_channel_for_promotion) date=$DateKey" + } + + $sourceTag = [string]$sourceRecord[0].tag_name + $sourceRelease = Invoke-GhJson -Arguments @( + 'release', 'view', + $sourceTag, + '-R', $Repository, + '--json', 'tagName,isPrerelease,targetCommitish,publishedAt,assets,url' + ) + + if ($modeConfig.enforce_prerelease_source -and -not [bool]$sourceRelease.isPrerelease) { + throw "promotion_source_not_prerelease: tag=$sourceTag channel=$([string]$modeConfig.source_channel_for_promotion)" + } + + $requiredAssets = @( + 'lvie-cdev-workspace-installer.exe', + 'lvie-cdev-workspace-installer.exe.sha256', + 'reproducibility-report.json', + 'workspace-installer.spdx.json', + 'workspace-installer.slsa.json', + 'release-manifest.json' + ) + $assetNames = @($sourceRelease.assets | ForEach-Object { [string]$_.name }) + foreach ($requiredAsset in $requiredAssets) { + if ($assetNames -notcontains $requiredAsset) { + throw "promotion_source_asset_missing: tag=$sourceTag asset=$requiredAsset" + } + } + + $headSha = (Invoke-GhText -Arguments @('api', "repos/$Repository/branches/$Branch", '--jq', '.commit.sha')).Trim().ToLowerInvariant() + $sourceCommit = ([string]$sourceRelease.targetCommitish).Trim().ToLowerInvariant() + if ($headSha -notmatch '^[0-9a-f]{40}$') { + throw "branch_head_unresolved: repository=$Repository branch=$Branch" + } + if ($sourceCommit -notmatch '^[0-9a-f]{40}$') { + throw "promotion_source_commit_invalid: tag=$sourceTag targetCommitish=$sourceCommit" + } + if ($headSha -ne $sourceCommit) { + throw "promotion_source_not_at_head: tag=$sourceTag source_sha=$sourceCommit head_sha=$headSha" + } + + $ExecutionReport.source_release = [ordered]@{ + channel = [string]$modeConfig.source_channel_for_promotion + tag = $sourceTag + source_sha = $sourceCommit + head_sha = $headSha + url = [string]$sourceRelease.url + } + } + + $nextSequence = if (@($targetRangeRecords).Count -eq 0) { + [int]$modeConfig.range_min + } else { + ([int]$targetRangeRecords[0].sequence) + 1 + } + + if ($nextSequence -gt [int]$modeConfig.range_max) { + throw "release_tag_range_exhausted: mode=$ModeName date=$DateKey next_sequence=$nextSequence range_max=$([int]$modeConfig.range_max)" + } + + $targetTag = "v0.$DateKey.$nextSequence" + $ExecutionReport.target_release = [ordered]@{ + mode = $ModeName + channel = [string]$modeConfig.channel + prerelease = [bool]$modeConfig.prerelease + tag = $targetTag + range_min = [int]$modeConfig.range_min + range_max = [int]$modeConfig.range_max + } + + if ($DryRun) { + $ExecutionReport.dispatch = [ordered]@{ + status = 'skipped_dry_run' + workflow = $ReleaseWorkflowFile + branch = $Branch + run_id = '' + url = '' + } + return + } + + $dispatchReportPath = Join-Path $ScratchRoot "$ModeName-dispatch.json" + & pwsh -NoProfile -File $dispatchWorkflowScript ` + -Repository $Repository ` + -WorkflowFile $ReleaseWorkflowFile ` + -Branch $Branch ` + -Input @( + "release_tag=$targetTag", + 'allow_existing_tag=false', + "prerelease=$([string]([bool]$modeConfig.prerelease).ToLowerInvariant())", + "release_channel=$([string]$modeConfig.channel)" + ) ` + -OutputPath $dispatchReportPath + if ($LASTEXITCODE -ne 0) { + throw "release_dispatch_failed: mode=$ModeName exit_code=$LASTEXITCODE" + } + $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + + $watchReportPath = Join-Path $ScratchRoot "$ModeName-watch.json" + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $Repository ` + -RunId ([string]$dispatchReport.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -OutputPath $watchReportPath + if ($LASTEXITCODE -ne 0) { + throw "release_watch_failed: mode=$ModeName run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" + } + $watchReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + + $ExecutionReport.dispatch = [ordered]@{ + status = 'success' + workflow = $ReleaseWorkflowFile + branch = $Branch + run_id = [string]$dispatchReport.run_id + url = [string]$watchReport.url + conclusion = [string]$watchReport.conclusion + } + + if ($ModeName -eq 'CanaryCycle') { + $hygienePath = Join-Path $ScratchRoot 'canary-hygiene.json' + & pwsh -NoProfile -File $canaryHygieneScript ` + -Repository $Repository ` + -DateUtc $DateKey ` + -KeepLatestN $KeepLatestCanaryN ` + -Delete ` + -OutputPath $hygienePath + if ($LASTEXITCODE -ne 0) { + throw "canary_hygiene_failed: date=$DateKey exit_code=$LASTEXITCODE" + } + $ExecutionReport.hygiene = Get-Content -LiteralPath $hygienePath -Raw | ConvertFrom-Json -ErrorAction Stop + } +} + +$scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("release-control-plane-" + [Guid]::NewGuid().ToString('N')) +New-Item -Path $scratchRoot -ItemType Directory -Force | Out-Null + +$report = [ordered]@{ + schema_version = '1.0' + timestamp_utc = Get-UtcNowIso + repository = $Repository + branch = $Branch + mode = $Mode + dry_run = [bool]$DryRun + auto_remediate = [bool]$AutoRemediate + sync_guard_max_age_hours = $SyncGuardMaxAgeHours + keep_latest_canary_n = $KeepLatestCanaryN + status = 'fail' + reason_code = '' + message = '' + pre_health = $null + remediation = $null + post_health = $null + executions = @() +} + +try { + $preHealthPath = Join-Path $scratchRoot 'pre-health.json' + $healthy = $false + try { + & pwsh -NoProfile -File $opsSnapshotScript ` + -SurfaceRepository $Repository ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $preHealthPath + if ($LASTEXITCODE -eq 0) { + $healthy = $true + } + } catch { + $healthy = $false + } + + if (Test-Path -LiteralPath $preHealthPath -PathType Leaf) { + $report.pre_health = Get-Content -LiteralPath $preHealthPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + + if (-not $healthy -and $AutoRemediate) { + $remediationPath = Join-Path $scratchRoot 'remediation.json' + & pwsh -NoProfile -File $opsRemediateScript ` + -SurfaceRepository $Repository ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $remediationPath + if (Test-Path -LiteralPath $remediationPath -PathType Leaf) { + $report.remediation = Get-Content -LiteralPath $remediationPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + } + + $postHealthPath = Join-Path $scratchRoot 'post-health.json' + & pwsh -NoProfile -File $opsSnapshotScript ` + -SurfaceRepository $Repository ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $postHealthPath + if ($LASTEXITCODE -ne 0) { + throw 'ops_health_gate_failed' + } + $report.post_health = Get-Content -LiteralPath $postHealthPath -Raw | ConvertFrom-Json -ErrorAction Stop + + if ([string]$report.post_health.status -ne 'pass') { + throw "ops_unhealthy: reason_codes=$([string]::Join(',', @($report.post_health.reason_codes)))" + } + + if ($Mode -eq 'Validate') { + $report.status = 'pass' + $report.reason_code = if ($DryRun) { 'validate_dry_run' } else { 'validated' } + $report.message = 'Release control plane validation completed without dispatch.' + } else { + $dateKey = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') + $executionList = [System.Collections.Generic.List[object]]::new() + + if ($Mode -eq 'FullCycle') { + $canaryExec = [ordered]@{} + Invoke-ReleaseMode -ModeName 'CanaryCycle' -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $canaryExec + [void]$executionList.Add($canaryExec) + + $prereleaseExec = [ordered]@{} + Invoke-ReleaseMode -ModeName 'PromotePrerelease' -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $prereleaseExec + [void]$executionList.Add($prereleaseExec) + + $stableExec = [ordered]@{ + target_release = [ordered]@{ + mode = 'PromoteStable' + status = 'skipped' + reason_code = 'stable_window_closed' + } + } + $dayOfWeekUtc = (Get-Date).ToUniversalTime().DayOfWeek.ToString() + if ($dayOfWeekUtc -eq 'Monday') { + $stableExec = [ordered]@{} + Invoke-ReleaseMode -ModeName 'PromoteStable' -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $stableExec + } + [void]$executionList.Add($stableExec) + } else { + $singleExec = [ordered]@{} + Invoke-ReleaseMode -ModeName $Mode -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $singleExec + [void]$executionList.Add($singleExec) + } + + $report.executions = @($executionList) + $report.status = 'pass' + $report.reason_code = if ($DryRun) { 'dry_run' } else { 'completed' } + $report.message = 'Release control plane completed.' + } +} +catch { + $report.status = 'fail' + $report.reason_code = 'control_plane_failed' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + if (Test-Path -LiteralPath $scratchRoot -PathType Container) { + Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Write-OpsSloReport.ps1 b/scripts/Write-OpsSloReport.ps1 new file mode 100644 index 0000000..4654f65 --- /dev/null +++ b/scripts/Write-OpsSloReport.ps1 @@ -0,0 +1,192 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SurfaceRepository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SyncGuardRepository = 'LabVIEW-Community-CI-CD/labview-cdev-cli', + + [Parameter()] + [ValidateRange(1, 90)] + [int]$LookbackDays = 7, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Get-WorkflowSloSummary { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter(Mandatory = $true)][string]$WorkflowName, + [Parameter(Mandatory = $true)][DateTime]$CutoffUtc + ) + + $runListOutput = & gh run list ` + -R $Repository ` + --workflow $WorkflowName ` + --limit 100 ` + --json databaseId,status,conclusion,createdAt,url,event 2>&1 + $runListExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + + $runs = @() + if ($runListExitCode -eq 0) { + $runListText = [string]::Join([Environment]::NewLine, @($runListOutput)) + if (-not [string]::IsNullOrWhiteSpace($runListText)) { + $runs = @($runListText | ConvertFrom-Json -ErrorAction Stop) + } + } else { + $runListErrorText = [string]::Join([Environment]::NewLine, @($runListOutput)) + if ($runListErrorText -match 'could not find any workflows named') { + $runs = @() + } else { + throw ("workflow_slo_query_failed: repository={0} workflow={1} error={2}" -f $Repository, $WorkflowName, $runListErrorText) + } + } + + $windowRuns = @( + $runs | + Where-Object { + $created = Parse-RunTimestamp -Run $_ + $created.UtcDateTime -ge $CutoffUtc + } + ) + $completedRuns = @($windowRuns | Where-Object { [string]$_.status -eq 'completed' }) + $successRuns = @($completedRuns | Where-Object { [string]$_.conclusion -eq 'success' }) + $failureRuns = @($completedRuns | Where-Object { [string]$_.conclusion -ne 'success' }) + + $latestRun = @($windowRuns | Sort-Object { Parse-RunTimestamp -Run $_ } -Descending | Select-Object -First 1) + $latest = $null + if (@($latestRun).Count -eq 1) { + $latest = [ordered]@{ + run_id = [string]$latestRun[0].databaseId + status = [string]$latestRun[0].status + conclusion = [string]$latestRun[0].conclusion + event = [string]$latestRun[0].event + created_at_utc = (Parse-RunTimestamp -Run $latestRun[0]).ToString('o') + url = [string]$latestRun[0].url + } + } + + $successRate = if (@($completedRuns).Count -eq 0) { 0.0 } else { [Math]::Round((@($successRuns).Count / @($completedRuns).Count) * 100, 2) } + + return [ordered]@{ + workflow = $WorkflowName + total_runs = @($windowRuns).Count + completed_runs = @($completedRuns).Count + success_runs = @($successRuns).Count + failure_runs = @($failureRuns).Count + success_rate_pct = $successRate + latest_run = $latest + } +} + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + surface_repository = $SurfaceRepository + sync_guard_repository = $SyncGuardRepository + lookback_days = $LookbackDays + window_start_utc = '' + workflows = @() + sync_guard = [ordered]@{} + canary_release_activity = [ordered]@{} +} + +try { + $cutoffUtc = (Get-Date).ToUniversalTime().AddDays(-1 * $LookbackDays) + $report.window_start_utc = $cutoffUtc.ToString('o') + + $report.workflows = @( + Get-WorkflowSloSummary -Repository $SurfaceRepository -WorkflowName 'ops-monitoring' -CutoffUtc $cutoffUtc + Get-WorkflowSloSummary -Repository $SurfaceRepository -WorkflowName 'ops-autoremediate' -CutoffUtc $cutoffUtc + Get-WorkflowSloSummary -Repository $SurfaceRepository -WorkflowName 'release-control-plane' -CutoffUtc $cutoffUtc + ) + + $syncGuardRuns = @(Invoke-GhJson -Arguments @( + 'run', 'list', + '-R', $SyncGuardRepository, + '--workflow', 'fork-upstream-sync-guard', + '--branch', 'main', + '--limit', '100', + '--json', 'databaseId,status,conclusion,createdAt,url,event' + )) + $syncGuardWindow = @( + $syncGuardRuns | + Where-Object { + (Parse-RunTimestamp -Run $_).UtcDateTime -ge $cutoffUtc + } | + Sort-Object { Parse-RunTimestamp -Run $_ } -Descending + ) + $syncGuardLatestSuccess = @( + $syncGuardWindow | + Where-Object { [string]$_.status -eq 'completed' -and [string]$_.conclusion -eq 'success' } | + Select-Object -First 1 + ) + $syncGuardAgeHours = $null + if (@($syncGuardLatestSuccess).Count -eq 1) { + $syncGuardAgeHours = [Math]::Round((((Get-Date).ToUniversalTime() - (Parse-RunTimestamp -Run $syncGuardLatestSuccess[0]).UtcDateTime).TotalHours), 2) + } + $report.sync_guard = [ordered]@{ + total_runs = @($syncGuardWindow).Count + latest_success_age_hours = $syncGuardAgeHours + latest_success_run = if (@($syncGuardLatestSuccess).Count -eq 1) { + [ordered]@{ + run_id = [string]$syncGuardLatestSuccess[0].databaseId + created_at_utc = (Parse-RunTimestamp -Run $syncGuardLatestSuccess[0]).ToString('o') + url = [string]$syncGuardLatestSuccess[0].url + } + } else { + $null + } + } + + $releases = @(Invoke-GhJson -Arguments @( + 'release', 'list', + '-R', $SurfaceRepository, + '--limit', '200', + '--exclude-drafts', + '--json', 'tagName,isPrerelease,publishedAt' + )) + $canaryReleases = @( + $releases | + Where-Object { + ([string]$_.tagName -match '^v0\.\d{8}\.(?:[1-9]|[1-4][0-9])$') -and [bool]$_.isPrerelease + } | + Where-Object { + $published = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$_.publishedAt, [ref]$published) + $published.UtcDateTime -ge $cutoffUtc + } | + Sort-Object { + $published = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$_.publishedAt, [ref]$published) + $published + } -Descending + ) + $report.canary_release_activity = [ordered]@{ + count = @($canaryReleases).Count + latest = if (@($canaryReleases).Count -gt 0) { + [ordered]@{ + tag = [string]$canaryReleases[0].tagName + published_at_utc = [string]$canaryReleases[0].publishedAt + } + } else { + $null + } + } +} +catch { + $report.error = [string]$_.Exception.Message + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + throw +} + +Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null diff --git a/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 b/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..95f5a17 --- /dev/null +++ b/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 @@ -0,0 +1,47 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Canary smoke tag hygiene workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/canary-smoke-tag-hygiene.yml' + $script:scriptPath = Join-Path $script:repoRoot 'scripts/Invoke-CanarySmokeTagHygiene.ps1' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Canary smoke tag hygiene workflow missing: $script:workflowPath" + } + if (-not (Test-Path -LiteralPath $script:scriptPath -PathType Leaf)) { + throw "Canary smoke tag hygiene script missing: $script:scriptPath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:scriptContent = Get-Content -LiteralPath $script:scriptPath -Raw + } + + It 'is scheduled and dispatchable with apply-controls inputs' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'target_date_utc' + $script:workflowContent | Should -Match 'keep_latest_n' + $script:workflowContent | Should -Match 'apply_changes' + $script:workflowContent | Should -Match 'type:\s*boolean' + } + + It 'runs hygiene script and uploads deterministic report artifact' { + $script:workflowContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' + $script:workflowContent | Should -Match 'canary-smoke-tag-hygiene-report\.json' + $script:workflowContent | Should -Match 'upload-artifact' + } + + It 'enforces keep-latest canary tag cleanup behavior' { + $script:scriptContent | Should -Match 'release''\s*,\s*''list''' + $script:scriptContent | Should -Match 'release''\s*,\s*''delete''' + $script:scriptContent | Should -Match '--cleanup-tag' + $script:scriptContent | Should -Match 'KeepLatestN' + $script:scriptContent | Should -Match '\(\?\\d\{8\}\)' + $script:scriptContent | Should -Match '\(\?\\d\+\)' + $script:scriptContent | Should -Match 'delete_count_exceeds_guard' + } +} diff --git a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..3a6ca8e --- /dev/null +++ b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 @@ -0,0 +1,46 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Ops auto-remediation workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/ops-autoremediate.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Invoke-OpsAutoRemediation.ps1' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Ops auto-remediation workflow missing: $script:workflowPath" + } + if (-not (Test-Path -LiteralPath $script:runtimePath -PathType Leaf)) { + throw "Ops auto-remediation runtime missing: $script:runtimePath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'sync_guard_max_age_hours' + $script:workflowContent | Should -Match 'actions:\s*write' + } + + It 'executes deterministic remediation and reports incidents' { + $script:workflowContent | Should -Match 'Invoke-OpsAutoRemediation\.ps1' + $script:workflowContent | Should -Match 'ops-autoremediate-report\.json' + $script:workflowContent | Should -Match 'Ops Auto-Remediation Alert' + $script:workflowContent | Should -Match 'gh issue' + } + + It 'targets sync-guard drift and classifies manual runner intervention' { + $script:runtimeContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:runtimeContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' + $script:runtimeContent | Should -Match 'Watch-WorkflowRun\.ps1' + $script:runtimeContent | Should -Match 'manual_intervention_required' + $script:runtimeContent | Should -Match 'remediated' + $script:runtimeContent | Should -Match 'no_automatable_action' + $script:runtimeContent | Should -Match 'remediation_failed' + } +} diff --git a/tests/OpsMonitoringWorkflowContract.Tests.ps1 b/tests/OpsMonitoringWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..3f95f28 --- /dev/null +++ b/tests/OpsMonitoringWorkflowContract.Tests.ps1 @@ -0,0 +1,45 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Ops monitoring workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/ops-monitoring.yml' + $script:scriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsMonitoringSnapshot.ps1' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Ops monitoring workflow missing: $script:workflowPath" + } + if (-not (Test-Path -LiteralPath $script:scriptPath -PathType Leaf)) { + throw "Ops monitoring script missing: $script:scriptPath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:scriptContent = Get-Content -LiteralPath $script:scriptPath -Raw + } + + It 'is scheduled and dispatchable' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'cron:' + } + + It 'runs snapshot script and uploads deterministic report artifact' { + $script:workflowContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:workflowContent | Should -Match 'ops-monitoring-report\.json' + $script:workflowContent | Should -Match 'upload-artifact' + $script:workflowContent | Should -Match 'Ops Monitoring Alert' + } + + It 'checks runner and sync-guard health with deterministic reason codes' { + $script:scriptContent | Should -Match 'repos/\$SurfaceRepository/actions/runners\?per_page=100' + $script:scriptContent | Should -Match 'run''\s*,\s*''list''' + $script:scriptContent | Should -Match 'runner_unavailable' + $script:scriptContent | Should -Match 'sync_guard_failed' + $script:scriptContent | Should -Match 'sync_guard_stale' + $script:scriptContent | Should -Match 'sync_guard_missing' + $script:scriptContent | Should -Match 'sync_guard_incomplete' + } +} diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..bfc65cc --- /dev/null +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -0,0 +1,55 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release control plane workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/release-control-plane.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseControlPlane.ps1' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Release control plane workflow missing: $script:workflowPath" + } + if (-not (Test-Path -LiteralPath $script:runtimePath -PathType Leaf)) { + throw "Release control plane runtime missing: $script:runtimePath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable with control inputs' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'mode:' + $script:workflowContent | Should -Match 'FullCycle' + $script:workflowContent | Should -Match 'auto_remediate' + $script:workflowContent | Should -Match 'keep_latest_canary_n' + $script:workflowContent | Should -Match 'dry_run' + } + + It 'runs autonomous control-plane runtime and uploads report' { + $script:workflowContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' + $script:workflowContent | Should -Match 'release-control-plane-report\.json' + $script:workflowContent | Should -Match 'Release Control Plane Alert' + $script:workflowContent | Should -Match 'actions:\s*write' + $script:workflowContent | Should -Match 'contents:\s*write' + } + + It 'implements mode sequencing, promotion guards, and deterministic tag ranges' { + $script:runtimeContent | Should -Match "ValidateSet\('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle'\)" + $script:runtimeContent | Should -Match 'range_min = 1' + $script:runtimeContent | Should -Match 'range_max = 49' + $script:runtimeContent | Should -Match 'range_min = 50' + $script:runtimeContent | Should -Match 'range_max = 79' + $script:runtimeContent | Should -Match 'range_min = 80' + $script:runtimeContent | Should -Match 'range_max = 99' + $script:runtimeContent | Should -Match 'promotion_source_missing' + $script:runtimeContent | Should -Match 'promotion_source_asset_missing' + $script:runtimeContent | Should -Match 'promotion_source_not_at_head' + $script:runtimeContent | Should -Match 'release_tag_range_exhausted' + $script:runtimeContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' + } +} diff --git a/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 b/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 index e783d7e..947e100 100644 --- a/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 +++ b/tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1 @@ -29,13 +29,18 @@ Describe 'Release with Windows gate workflow contract' { It 'contains repo guard, hard gate ordering, and reusable workflow chaining' { $script:workflowContent | Should -Match "expectedRepo = 'LabVIEW-Community-CI-CD/labview-cdev-surface'" + $script:workflowContent | Should -Match 'ops_health_gate:' + $script:workflowContent | Should -Match 'needs:\s*\[repo_guard,\s*ops_health_gate\]' + $script:workflowContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:workflowContent | Should -Match 'release-ops-health-gate-\$\{\{\s*github\.run_id\s*\}\}' + $script:workflowContent | Should -Match '\[ops_unhealthy\]' $script:workflowContent | Should -Match 'windows_gate:' $script:workflowContent | Should -Match 'linux_gate:' - $script:workflowContent | Should -Match 'needs:\s*\[repo_guard\]' + $script:workflowContent | Should -Match 'needs:\s*\[repo_guard,\s*ops_health_gate\]' $script:workflowContent | Should -Match 'uses:\s*\./\.github/workflows/_windows-labview-image-gate-core\.yml' $script:workflowContent | Should -Match 'uses:\s*\./\.github/workflows/_linux-labview-image-gate-core\.yml' $script:workflowContent | Should -Match 'gate_policy:' - $script:workflowContent | Should -Match 'needs:\s*\[repo_guard,\s*windows_gate,\s*linux_gate\]' + $script:workflowContent | Should -Match 'needs:\s*\[repo_guard,\s*ops_health_gate,\s*windows_gate,\s*linux_gate\]' $script:workflowContent | Should -Match 'if:\s*\$\{\{\s*always\(\)\s*\}\}' $script:workflowContent | Should -Match 'release_publish:' $script:workflowContent | Should -Match 'needs:\s*\[gate_policy\]' @@ -44,6 +49,7 @@ Describe 'Release with Windows gate workflow contract' { It 'enforces hard block and controlled override metadata requirements' { $script:workflowContent | Should -Match 'Repository guard did not succeed' + $script:workflowContent | Should -Match 'Ops health gate did not succeed' $script:workflowContent | Should -Match 'One or more gates failed and override is not enabled' $script:workflowContent | Should -Match 'allow_gate_override=true requires non-empty override_reason' $script:workflowContent | Should -Match 'allow_gate_override=true requires override_incident_url' diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 new file mode 100644 index 0000000..5d4b76b --- /dev/null +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -0,0 +1,42 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Scope A ops runbook contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:runbookPath = Join-Path $script:repoRoot 'docs/runbooks/release-ops-incident-response.md' + $script:readmePath = Join-Path $script:repoRoot 'README.md' + $script:agentsPath = Join-Path $script:repoRoot 'AGENTS.md' + + foreach ($path in @($script:runbookPath, $script:readmePath, $script:agentsPath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Required ops hardening contract file missing: $path" + } + } + + $script:runbookContent = Get-Content -LiteralPath $script:runbookPath -Raw + $script:readmeContent = Get-Content -LiteralPath $script:readmePath -Raw + $script:agentsContent = Get-Content -LiteralPath $script:agentsPath -Raw + } + + It 'documents deterministic incident commands for runner, sync-guard, and canary hygiene' { + $script:runbookContent | Should -Match 'Get-Service' + $script:runbookContent | Should -Match 'fork-upstream-sync-guard' + $script:runbookContent | Should -Match 'Invoke-ControlledForkForceAlign\.ps1' + $script:runbookContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' + $script:runbookContent | Should -Match '20260226' + } + + It 'keeps README and AGENTS aligned to Scope A workflows' { + $script:readmeContent | Should -Match 'ops-monitoring\.yml' + $script:readmeContent | Should -Match 'canary-smoke-tag-hygiene\.yml' + $script:readmeContent | Should -Match 'release-ops-incident-response\.md' + + $script:agentsContent | Should -Match 'Ops Monitoring Policy' + $script:agentsContent | Should -Match 'runner_unavailable' + $script:agentsContent | Should -Match 'sync_guard_failed' + $script:agentsContent | Should -Match 'canary-smoke-tag-hygiene\.yml' + } +} diff --git a/tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1 b/tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..8a14260 --- /dev/null +++ b/tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1 @@ -0,0 +1,44 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Weekly ops SLO workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/weekly-ops-slo-report.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Write-OpsSloReport.ps1' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Weekly ops SLO workflow missing: $script:workflowPath" + } + if (-not (Test-Path -LiteralPath $script:runtimePath -PathType Leaf)) { + throw "Weekly ops SLO runtime missing: $script:runtimePath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled weekly and dispatchable with lookback input' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'lookback_days' + $script:workflowContent | Should -Match 'weekly-ops-slo-report' + } + + It 'generates and uploads machine-readable SLO report artifact' { + $script:workflowContent | Should -Match 'Write-OpsSloReport\.ps1' + $script:workflowContent | Should -Match 'weekly-ops-slo-report\.json' + $script:workflowContent | Should -Match 'upload-artifact' + } + + It 'calculates workflow and sync-guard SLO summaries' { + $script:runtimeContent | Should -Match 'Get-WorkflowSloSummary' + $script:runtimeContent | Should -Match 'ops-monitoring' + $script:runtimeContent | Should -Match 'ops-autoremediate' + $script:runtimeContent | Should -Match 'release-control-plane' + $script:runtimeContent | Should -Match 'fork-upstream-sync-guard' + $script:runtimeContent | Should -Match 'success_rate_pct' + } +} diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 7dac9ac..ce55854 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -46,6 +46,12 @@ Describe 'Workspace installer release workflow contract' { } It 'defines package and publish jobs with release asset upload' { + $script:coreWorkflowContent | Should -Match 'name:\s*Release Ops Health Preflight' + $script:coreWorkflowContent | Should -Match 'Enforce ops health preflight' + $script:coreWorkflowContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:coreWorkflowContent | Should -Match 'reason_code=ops_unhealthy' + $script:coreWorkflowContent | Should -Match '\[ops_unhealthy\]' + $script:coreWorkflowContent | Should -Match 'release-ops-health-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Release Runner Availability Preflight' $script:coreWorkflowContent | Should -Match 'Validate eligible self-hosted release runner availability' $script:coreWorkflowContent | Should -Match 'repos/\$repo/actions/runners\?per_page=100' @@ -53,7 +59,7 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match '\[runner_unavailable\]' $script:coreWorkflowContent | Should -Match 'release-runner-availability-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Package Workspace Installer' - $script:coreWorkflowContent | Should -Match 'needs:\s*\[runner_preflight\]' + $script:coreWorkflowContent | Should -Match 'needs:\s*\[ops_health_preflight,\s*runner_preflight\]' $script:coreWorkflowContent | Should -Match 'name:\s*Publish GitHub Release Asset' $script:coreWorkflowContent | Should -Match 'Release preflight - verify icon-editor upstream pin freshness' $script:coreWorkflowContent | Should -Match 'repos/LabVIEW-Community-CI-CD/labview-icon-editor/branches/develop' @@ -68,6 +74,11 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'Get-AuthenticodeSignature' $script:coreWorkflowContent | Should -Match 'WORKSPACE_INSTALLER_CODESIGN_PFX_B64' $script:coreWorkflowContent | Should -Match 'WORKSPACE_INSTALLER_CODESIGN_PFX_PASSWORD' + $script:coreWorkflowContent | Should -Match 'signatureDualModeStartUtc' + $script:coreWorkflowContent | Should -Match 'signatureCanaryEnforceUtc' + $script:coreWorkflowContent | Should -Match 'signatureGraceEndUtc' + $script:coreWorkflowContent | Should -Match '\[signature_required\]' + $script:coreWorkflowContent | Should -Match '\[signature_warning\]' $script:coreWorkflowContent | Should -Match 'release-manifest\.json' $script:coreWorkflowContent | Should -Match 'release and parity artifact roots are identical' $script:coreWorkflowContent | Should -Match 'must not point to parity path' From 8d657f533133752e8930ce47abd667a349e48c2d Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 15:59:20 -0800 Subject: [PATCH 05/60] feat(ops): add local docker release-control-plane harness --- .github/workflows/ci.yml | 1 + .vscode/tasks.json | 27 +++ AGENTS.md | 1 + README.md | 19 ++ scripts/Cancel-StaleWorkflowRuns.ps1 | 10 +- scripts/Dispatch-WorkflowAtRemoteHead.ps1 | 10 +- scripts/Exercise-ReleaseControlPlaneLocal.ps1 | 190 ++++++++++++++++++ scripts/Invoke-CanarySmokeTagHygiene.ps1 | 8 +- scripts/Invoke-OpsMonitoringSnapshot.ps1 | 98 +++++---- scripts/Invoke-ReleaseControlPlane.ps1 | 8 +- .../Invoke-ReleaseControlPlaneLocalDocker.ps1 | 89 ++++++++ scripts/Watch-WorkflowRun.ps1 | 9 +- scripts/Write-OpsSloReport.ps1 | 39 +--- scripts/lib/WorkflowOps.Common.ps1 | 131 ++++++++++++ ...ySmokeTagHygieneWorkflowContract.Tests.ps1 | 2 +- tests/OpsMonitoringWorkflowContract.Tests.ps1 | 3 +- ...lPlaneLocalDockerHarnessContract.Tests.ps1 | 44 ++++ tests/VsCodeTasksContract.Tests.ps1 | 2 + tools/ops-runtime/Dockerfile | 2 + tools/ops-runtime/README.md | 23 +++ 20 files changed, 602 insertions(+), 114 deletions(-) create mode 100644 scripts/Exercise-ReleaseControlPlaneLocal.ps1 create mode 100644 scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 create mode 100644 tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 create mode 100644 tools/ops-runtime/README.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0dbdbb3..9ca40b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,6 +63,7 @@ jobs: './tests/WatchWorkflowRunContract.Tests.ps1', './tests/PortableOpsRuntimeContract.Tests.ps1', './tests/VsCodeTasksContract.Tests.ps1', + './tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1', './tests/UploadArtifactRetryCompositeContract.Tests.ps1', './tests/InstallerHarnessWorkflowContract.Tests.ps1', './tests/OpsMonitoringWorkflowContract.Tests.ps1', diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 7ed178a..9eb50f7 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -23,6 +23,12 @@ "id": "opsRunId", "type": "promptString", "description": "Run ID (optional for watch)" + }, + { + "id": "opsControlPlaneMode", + "type": "promptString", + "description": "Release control plane mode", + "default": "Validate" } ], "tasks": [ @@ -116,6 +122,27 @@ "artifacts/dispatch/queue-snapshot.json" ], "problemMatcher": [] + }, + { + "label": "ops: release control plane local (docker)", + "type": "shell", + "command": "pwsh", + "args": [ + "-NoProfile", + "-File", + "scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1", + "-Repository", + "${input:opsRepo}", + "-Branch", + "${input:opsBranch}", + "-Mode", + "${input:opsControlPlaneMode}", + "-DryRun", + "-RunContractTests", + "-OutputRoot", + "artifacts/release-control-plane-local" + ], + "problemMatcher": [] } ] } diff --git a/AGENTS.md b/AGENTS.md index 2df70e3..a5c2e76 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -241,6 +241,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `-Mode full` for isolated smoke install validation. - `-Watch` to auto-rerun on contract file changes without manual restarts. - Use `scripts/Invoke-DockerDesktopLinuxIteration.ps1 -DockerContext desktop-linux` for Docker Desktop Linux command-surface checks (`runner-cli --help`, `runner-cli ppl --help`) before full Windows LabVIEW image runs. +- Use `scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1` for local containerized release-control-plane exercise (`Validate` + `DryRun` default). - If Docker Desktop Linux context is unavailable, confirm `Microsoft-Hyper-V-All`, `VirtualMachinePlatform`, and `Microsoft-Windows-Subsystem-Linux` are enabled, then reboot before retrying. - Use `scripts/Test-RunnerCliBundleDeterminism.ps1` and `scripts/Test-WorkspaceInstallerDeterminism.ps1` locally before proposing release-tag publication. - Keep local iteration artifacts under `artifacts\release\iteration`. diff --git a/README.md b/README.md index eb43bbf..98893de 100644 --- a/README.md +++ b/README.md @@ -326,6 +326,25 @@ Control-plane behavior: `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. +## Local Docker package for control-plane exercise + +Run the local Docker harness (safe default, validate + dry-run): + +```powershell +pwsh -NoProfile -File .\scripts\Invoke-ReleaseControlPlaneLocalDocker.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -Branch main ` + -Mode Validate ` + -DryRun ` + -RunContractTests +``` + +This executes `scripts/Exercise-ReleaseControlPlaneLocal.ps1` in the portable ops container image and writes artifacts under: +- `artifacts\release-control-plane-local` + +For offline or container runtime fallback on the host: +- add `-HostFallback` + Runbook for incidents: - `docs/runbooks/release-ops-incident-response.md` diff --git a/scripts/Cancel-StaleWorkflowRuns.ps1 b/scripts/Cancel-StaleWorkflowRuns.ps1 index c9d62dd..3c4974f 100644 --- a/scripts/Cancel-StaleWorkflowRuns.ps1 +++ b/scripts/Cancel-StaleWorkflowRuns.ps1 @@ -25,15 +25,7 @@ $ErrorActionPreference = 'Stop' . (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') -$allRuns = @(Invoke-GhJson -Arguments @( - 'run', 'list', - '-R', $Repository, - '--workflow', $WorkflowFile, - '--branch', $Branch, - '--event', 'workflow_dispatch', - '--limit', '100', - '--json', 'databaseId,status,conclusion,url,createdAt,headSha' -)) +$allRuns = @(Get-GhWorkflowRunsPortable -Repository $Repository -Workflow $WorkflowFile -Branch $Branch -Event 'workflow_dispatch' -Limit 100) $orderedRuns = @($allRuns | Sort-Object { Parse-RunTimestamp -Run $_ } -Descending) $keepIds = @($orderedRuns | Select-Object -First ([Math]::Max($KeepLatestN, 0)) | ForEach-Object { [string]$_.databaseId }) diff --git a/scripts/Dispatch-WorkflowAtRemoteHead.ps1 b/scripts/Dispatch-WorkflowAtRemoteHead.ps1 index a7e7c9b..f186ec3 100644 --- a/scripts/Dispatch-WorkflowAtRemoteHead.ps1 +++ b/scripts/Dispatch-WorkflowAtRemoteHead.ps1 @@ -58,15 +58,7 @@ Invoke-Gh -Arguments $dispatchArgs Start-Sleep -Seconds $DispatchPauseSeconds -$runList = @(Invoke-GhJson -Arguments @( - 'run', 'list', - '-R', $Repository, - '--workflow', $WorkflowFile, - '--branch', $Branch, - '--event', 'workflow_dispatch', - '--limit', '30', - '--json', 'databaseId,status,conclusion,url,createdAt,headSha,displayTitle' -)) +$runList = @(Get-GhWorkflowRunsPortable -Repository $Repository -Workflow $WorkflowFile -Branch $Branch -Event 'workflow_dispatch' -Limit 30) $candidates = @( $runList | Where-Object { diff --git a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 new file mode 100644 index 0000000..7e26771 --- /dev/null +++ b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 @@ -0,0 +1,190 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$Branch = 'main', + + [Parameter()] + [ValidateSet('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle')] + [string]$Mode = 'Validate', + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [ValidateRange(1, 10)] + [int]$KeepLatestCanaryN = 1, + + [Parameter()] + [switch]$IncludeOpsAutoRemediation, + + [Parameter()] + [switch]$RunContractTests, + + [Parameter()] + [switch]$DryRun, + + [Parameter()] + [switch]$AllowMutatingModes, + + [Parameter()] + [string]$OutputRoot = 'artifacts/release-control-plane-local' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +if (-not [string]::Equals($Mode, 'Validate', [System.StringComparison]::OrdinalIgnoreCase) -and -not $AllowMutatingModes) { + throw "mutating_mode_blocked: mode '$Mode' requires -AllowMutatingModes." +} + +$repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path +$resolvedOutputRoot = [System.IO.Path]::GetFullPath((Join-Path $repoRoot $OutputRoot)) +if (-not (Test-Path -LiteralPath $resolvedOutputRoot -PathType Container)) { + New-Item -Path $resolvedOutputRoot -ItemType Directory -Force | Out-Null +} + +$opsSnapshotScript = Join-Path $PSScriptRoot 'Invoke-OpsMonitoringSnapshot.ps1' +$opsRemediateScript = Join-Path $PSScriptRoot 'Invoke-OpsAutoRemediation.ps1' +$controlPlaneScript = Join-Path $PSScriptRoot 'Invoke-ReleaseControlPlane.ps1' +$sloScript = Join-Path $PSScriptRoot 'Write-OpsSloReport.ps1' +foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $controlPlaneScript, $sloScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +$summary = [ordered]@{ + schema_version = '1.0' + generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') + repository = $Repository + branch = $Branch + mode = $Mode + dry_run = [bool]$DryRun + allow_mutating_modes = [bool]$AllowMutatingModes + output_root = $resolvedOutputRoot + status = 'fail' + steps = @() +} + +function Add-StepResult { + param( + [Parameter(Mandatory = $true)] + [string]$Name, + [Parameter(Mandatory = $true)] + [string]$Status, + [Parameter()] + [string]$OutputPath = '', + [Parameter()] + [string]$Message = '' + ) + + $step = [ordered]@{ + name = $Name + status = $Status + output_path = $OutputPath + message = $Message + } + $summary.steps += @($step) +} + +try { + $opsSnapshotPath = Join-Path $resolvedOutputRoot 'ops-monitoring-report.json' + & pwsh -NoProfile -File $opsSnapshotScript ` + -SurfaceRepository $Repository ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $opsSnapshotPath + if ($LASTEXITCODE -ne 0) { + throw "ops_snapshot_failed: exit_code=$LASTEXITCODE" + } + Add-StepResult -Name 'ops_monitoring' -Status 'pass' -OutputPath $opsSnapshotPath + + if ($IncludeOpsAutoRemediation) { + $opsRemediatePath = Join-Path $resolvedOutputRoot 'ops-autoremediate-report.json' + & pwsh -NoProfile -File $opsRemediateScript ` + -SurfaceRepository $Repository ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -OutputPath $opsRemediatePath + if ($LASTEXITCODE -ne 0) { + throw "ops_autoremediation_failed: exit_code=$LASTEXITCODE" + } + Add-StepResult -Name 'ops_autoremediate' -Status 'pass' -OutputPath $opsRemediatePath + } else { + Add-StepResult -Name 'ops_autoremediate' -Status 'skipped' -Message 'IncludeOpsAutoRemediation not set.' + } + + $controlPlanePath = Join-Path $resolvedOutputRoot 'release-control-plane-report.json' + & pwsh -NoProfile -File $controlPlaneScript ` + -Repository $Repository ` + -Branch $Branch ` + -Mode $Mode ` + -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` + -KeepLatestCanaryN $KeepLatestCanaryN ` + -AutoRemediate:$false ` + -DryRun:$DryRun ` + -OutputPath $controlPlanePath + if ($LASTEXITCODE -ne 0) { + throw "release_control_plane_failed: exit_code=$LASTEXITCODE" + } + Add-StepResult -Name 'release_control_plane' -Status 'pass' -OutputPath $controlPlanePath + + $sloPath = Join-Path $resolvedOutputRoot 'weekly-ops-slo-report.json' + & pwsh -NoProfile -File $sloScript ` + -SurfaceRepository $Repository ` + -OutputPath $sloPath + if ($LASTEXITCODE -ne 0) { + throw "ops_slo_report_failed: exit_code=$LASTEXITCODE" + } + Add-StepResult -Name 'weekly_ops_slo' -Status 'pass' -OutputPath $sloPath + + if ($RunContractTests) { + $pesterOutputPath = Join-Path $resolvedOutputRoot 'control-plane-contract-tests.xml' + $pesterPaths = @( + (Join-Path $repoRoot 'tests/OpsMonitoringWorkflowContract.Tests.ps1'), + (Join-Path $repoRoot 'tests/OpsAutoRemediationWorkflowContract.Tests.ps1'), + (Join-Path $repoRoot 'tests/ReleaseControlPlaneWorkflowContract.Tests.ps1'), + (Join-Path $repoRoot 'tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1') + ) + $pesterConfig = New-PesterConfiguration + $pesterConfig.Run.Path = $pesterPaths + $pesterConfig.Run.Exit = $false + $pesterConfig.Run.PassThru = $true + $pesterConfig.Output.Verbosity = 'Detailed' + $pesterConfig.TestResult.Enabled = $true + $pesterConfig.TestResult.OutputFormat = 'NUnitXml' + $pesterConfig.TestResult.OutputPath = $pesterOutputPath + $pesterResult = Invoke-Pester -Configuration $pesterConfig + if ($null -eq $pesterResult) { + throw 'contract_tests_failed: pester_result_missing' + } + if ([int]$pesterResult.FailedCount -gt 0 -or [int]$pesterResult.FailedBlocksCount -gt 0) { + throw ("contract_tests_failed: failed_count={0}" -f [int]$pesterResult.FailedCount) + } + Add-StepResult -Name 'contract_tests' -Status 'pass' -OutputPath $pesterOutputPath + } else { + Add-StepResult -Name 'contract_tests' -Status 'skipped' -Message 'RunContractTests not set.' + } + + $summary.status = 'pass' +} +catch { + Add-StepResult -Name 'harness' -Status 'fail' -Message ([string]$_.Exception.Message) + $summary.status = 'fail' +} +finally { + $summaryPath = Join-Path $resolvedOutputRoot 'release-control-plane-local-summary.json' + $summary | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $summaryPath -Encoding utf8 + Write-Host "Summary written: $summaryPath" +} + +if ([string]$summary.status -ne 'pass') { + exit 1 +} + +exit 0 diff --git a/scripts/Invoke-CanarySmokeTagHygiene.ps1 b/scripts/Invoke-CanarySmokeTagHygiene.ps1 index dc9b919..7cd8af6 100644 --- a/scripts/Invoke-CanarySmokeTagHygiene.ps1 +++ b/scripts/Invoke-CanarySmokeTagHygiene.ps1 @@ -57,13 +57,7 @@ $report = [ordered]@{ } try { - $releaseList = @(Invoke-GhJson -Arguments @( - 'release', 'list', - '-R', $Repository, - '--limit', '200', - '--exclude-drafts', - '--json', 'tagName,isPrerelease,publishedAt' - )) + $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) $report.releases_scanned = @($releaseList).Count $candidates = @() diff --git a/scripts/Invoke-OpsMonitoringSnapshot.ps1 b/scripts/Invoke-OpsMonitoringSnapshot.ps1 index f1a0e7d..c970a68 100644 --- a/scripts/Invoke-OpsMonitoringSnapshot.ps1 +++ b/scripts/Invoke-OpsMonitoringSnapshot.ps1 @@ -99,12 +99,25 @@ function Get-RunTimestampUtc { return [DateTimeOffset]::MinValue } +function Get-SyncGuardRuns { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter(Mandatory = $true)][string]$Workflow, + [Parameter(Mandatory = $true)][string]$Branch, + [Parameter(Mandatory = $true)][int]$Limit + ) + + return @(Get-GhWorkflowRunsPortable -Repository $Repository -Workflow $Workflow -Branch $Branch -Limit $Limit) +} + $reasonCodes = [System.Collections.Generic.List[string]]::new() $report = [ordered]@{ schema_version = '1.0' timestamp_utc = Get-UtcNowIso surface_repository = $SurfaceRepository required_runner_labels = @() + runner_visibility = 'available' + warnings = @() runner_summary = [ordered]@{ total = 0 online = 0 @@ -133,53 +146,62 @@ try { ) $report.required_runner_labels = $normalizedRequiredLabels - $runnerPayload = Invoke-GhJson -Arguments @('api', "repos/$SurfaceRepository/actions/runners?per_page=100") - $allRunners = @($runnerPayload.runners) + $allRunners = @() $onlineRunners = @() $eligibleRunners = @() + $runnerApiOutput = & gh api "repos/$SurfaceRepository/actions/runners?per_page=100" 2>&1 + $runnerApiExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($runnerApiExitCode -ne 0) { + $runnerApiText = [string]::Join([Environment]::NewLine, @($runnerApiOutput)) + if ($runnerApiText -match 'Resource not accessible by integration' -or $runnerApiText -match 'HTTP 403') { + $report.runner_visibility = 'forbidden' + $report.warnings = @('runner_visibility_unavailable') + } else { + throw "runner_api_query_failed: $runnerApiText" + } + } else { + $runnerJsonText = [string]::Join([Environment]::NewLine, @($runnerApiOutput)) + $runnerPayload = $runnerJsonText | ConvertFrom-Json -ErrorAction Stop + $allRunners = @($runnerPayload.runners) + } - foreach ($runner in $allRunners) { - $labelMap = @{} - foreach ($label in @($runner.labels)) { - $name = ([string]$label.name).ToLowerInvariant().Trim() - if (-not [string]::IsNullOrWhiteSpace($name)) { - $labelMap[$name] = $true + if ($report.runner_visibility -eq 'available') { + foreach ($runner in $allRunners) { + $labelMap = @{} + foreach ($label in @($runner.labels)) { + $name = ([string]$label.name).ToLowerInvariant().Trim() + if (-not [string]::IsNullOrWhiteSpace($name)) { + $labelMap[$name] = $true + } } - } - $runnerRecord = [ordered]@{ - name = [string]$runner.name - status = [string]$runner.status - busy = [bool]$runner.busy - labels = @($runner.labels | ForEach-Object { [string]$_.name }) - missing_required_labels = @($normalizedRequiredLabels | Where-Object { -not $labelMap.ContainsKey($_) }) - } + $runnerRecord = [ordered]@{ + name = [string]$runner.name + status = [string]$runner.status + busy = [bool]$runner.busy + labels = @($runner.labels | ForEach-Object { [string]$_.name }) + missing_required_labels = @($normalizedRequiredLabels | Where-Object { -not $labelMap.ContainsKey($_) }) + } - if ([string]$runner.status -eq 'online') { - $onlineRunners += $runnerRecord - if (@($runnerRecord.missing_required_labels).Count -eq 0) { - $eligibleRunners += $runnerRecord + if ([string]$runner.status -eq 'online') { + $onlineRunners += $runnerRecord + if (@($runnerRecord.missing_required_labels).Count -eq 0) { + $eligibleRunners += $runnerRecord + } } } - } - $report.runner_summary.total = @($allRunners).Count - $report.runner_summary.online = @($onlineRunners).Count - $report.runner_summary.eligible = @($eligibleRunners).Count - $report.eligible_runners = @($eligibleRunners) + $report.runner_summary.total = @($allRunners).Count + $report.runner_summary.online = @($onlineRunners).Count + $report.runner_summary.eligible = @($eligibleRunners).Count + $report.eligible_runners = @($eligibleRunners) - if (@($eligibleRunners).Count -eq 0) { - Add-ReasonCode -Target $reasonCodes -ReasonCode 'runner_unavailable' + if (@($eligibleRunners).Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'runner_unavailable' + } } - $syncRunsRaw = @(Invoke-GhJson -Arguments @( - 'run', 'list', - '-R', $SyncGuardRepository, - '--workflow', $SyncGuardWorkflow, - '--branch', $SyncGuardBranch, - '--limit', '25', - '--json', 'databaseId,status,conclusion,url,createdAt,headSha,event' - )) + $syncRunsRaw = @(Get-SyncGuardRuns -Repository $SyncGuardRepository -Workflow $SyncGuardWorkflow -Branch $SyncGuardBranch -Limit 25) $syncRuns = @($syncRunsRaw | Sort-Object { Get-RunTimestampUtc -Run $_ } -Descending) $latestRun = $null @@ -221,7 +243,11 @@ try { if ($reasonCodes.Count -eq 0) { $report.status = 'pass' $report.reason_codes = @('ok') - $report.message = 'Operations monitoring snapshot passed.' + if (@($report.warnings).Count -gt 0) { + $report.message = "Operations monitoring snapshot passed with warnings. warnings=$([string]::Join(',', @($report.warnings)))" + } else { + $report.message = 'Operations monitoring snapshot passed.' + } } else { $report.status = 'fail' $report.reason_codes = @($reasonCodes) diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 77765f3..d3607cd 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -168,13 +168,7 @@ function Invoke-ReleaseMode { ) $modeConfig = Get-ModeConfig -ModeName $ModeName - $releaseList = @(Invoke-GhJson -Arguments @( - 'release', 'list', - '-R', $Repository, - '--limit', '200', - '--exclude-drafts', - '--json', 'tagName,isPrerelease,publishedAt' - )) + $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) $records = @(Get-ReleaseRecordsForDate -ReleaseList $releaseList -DateKey $DateKey) $targetRangeRecords = @( diff --git a/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 b/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 new file mode 100644 index 0000000..7070cc9 --- /dev/null +++ b/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 @@ -0,0 +1,89 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$Branch = 'main', + + [Parameter()] + [ValidateSet('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle')] + [string]$Mode = 'Validate', + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [ValidateRange(1, 10)] + [int]$KeepLatestCanaryN = 1, + + [Parameter()] + [switch]$IncludeOpsAutoRemediation, + + [Parameter()] + [switch]$RunContractTests, + + [Parameter()] + [switch]$DryRun, + + [Parameter()] + [switch]$AllowMutatingModes, + + [Parameter()] + [string]$OutputRoot = 'artifacts/release-control-plane-local', + + [Parameter()] + [string]$Image = 'ghcr.io/svelderrainruiz/labview-cdev-surface-ops:v1', + + [Parameter()] + [switch]$BuildLocalImage, + + [Parameter()] + [string]$LocalTag = 'labview-cdev-surface-ops:local', + + [Parameter()] + [switch]$HostFallback +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +$portableWrapper = Join-Path $PSScriptRoot 'Invoke-PortableOps.ps1' +if (-not (Test-Path -LiteralPath $portableWrapper -PathType Leaf)) { + throw "portable_wrapper_missing: $portableWrapper" +} + +$scriptArgs = @( + '-Repository', $Repository, + '-Branch', $Branch, + '-Mode', $Mode, + '-SyncGuardMaxAgeHours', [string]$SyncGuardMaxAgeHours, + '-KeepLatestCanaryN', [string]$KeepLatestCanaryN, + '-OutputRoot', $OutputRoot +) +if ($IncludeOpsAutoRemediation) { + $scriptArgs += '-IncludeOpsAutoRemediation' +} +if ($RunContractTests) { + $scriptArgs += '-RunContractTests' +} +if ($DryRun) { + $scriptArgs += '-DryRun' +} +if ($AllowMutatingModes) { + $scriptArgs += '-AllowMutatingModes' +} + +& $portableWrapper ` + -ScriptPath 'scripts/Exercise-ReleaseControlPlaneLocal.ps1' ` + -ScriptArguments $scriptArgs ` + -Image $Image ` + -BuildLocalImage:$BuildLocalImage ` + -LocalTag $LocalTag ` + -HostFallback:$HostFallback + +exit $LASTEXITCODE diff --git a/scripts/Watch-WorkflowRun.ps1 b/scripts/Watch-WorkflowRun.ps1 index bb8366f..b794a42 100644 --- a/scripts/Watch-WorkflowRun.ps1 +++ b/scripts/Watch-WorkflowRun.ps1 @@ -33,14 +33,7 @@ if ([string]::IsNullOrWhiteSpace($RunId)) { throw 'run_id_or_workflow_required: provide -RunId or both -WorkflowFile and -Branch.' } - $latest = @(Invoke-GhJson -Arguments @( - 'run', 'list', - '-R', $Repository, - '--workflow', $WorkflowFile, - '--branch', $Branch, - '--limit', '1', - '--json', 'databaseId,url,status,conclusion,createdAt' - )) | Select-Object -First 1 + $latest = @(Get-GhWorkflowRunsPortable -Repository $Repository -Workflow $WorkflowFile -Branch $Branch -Limit 1) | Select-Object -First 1 if ($null -eq $latest) { throw 'run_not_found: no workflow runs available to watch.' diff --git a/scripts/Write-OpsSloReport.ps1 b/scripts/Write-OpsSloReport.ps1 index 4654f65..21cd905 100644 --- a/scripts/Write-OpsSloReport.ps1 +++ b/scripts/Write-OpsSloReport.ps1 @@ -29,27 +29,7 @@ function Get-WorkflowSloSummary { [Parameter(Mandatory = $true)][DateTime]$CutoffUtc ) - $runListOutput = & gh run list ` - -R $Repository ` - --workflow $WorkflowName ` - --limit 100 ` - --json databaseId,status,conclusion,createdAt,url,event 2>&1 - $runListExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } - - $runs = @() - if ($runListExitCode -eq 0) { - $runListText = [string]::Join([Environment]::NewLine, @($runListOutput)) - if (-not [string]::IsNullOrWhiteSpace($runListText)) { - $runs = @($runListText | ConvertFrom-Json -ErrorAction Stop) - } - } else { - $runListErrorText = [string]::Join([Environment]::NewLine, @($runListOutput)) - if ($runListErrorText -match 'could not find any workflows named') { - $runs = @() - } else { - throw ("workflow_slo_query_failed: repository={0} workflow={1} error={2}" -f $Repository, $WorkflowName, $runListErrorText) - } - } + $runs = @(Get-GhWorkflowRunsPortable -Repository $Repository -Workflow $WorkflowName -Limit 100) $windowRuns = @( $runs | @@ -110,14 +90,7 @@ try { Get-WorkflowSloSummary -Repository $SurfaceRepository -WorkflowName 'release-control-plane' -CutoffUtc $cutoffUtc ) - $syncGuardRuns = @(Invoke-GhJson -Arguments @( - 'run', 'list', - '-R', $SyncGuardRepository, - '--workflow', 'fork-upstream-sync-guard', - '--branch', 'main', - '--limit', '100', - '--json', 'databaseId,status,conclusion,createdAt,url,event' - )) + $syncGuardRuns = @(Get-GhWorkflowRunsPortable -Repository $SyncGuardRepository -Workflow 'fork-upstream-sync-guard' -Branch 'main' -Limit 100) $syncGuardWindow = @( $syncGuardRuns | Where-Object { @@ -148,13 +121,7 @@ try { } } - $releases = @(Invoke-GhJson -Arguments @( - 'release', 'list', - '-R', $SurfaceRepository, - '--limit', '200', - '--exclude-drafts', - '--json', 'tagName,isPrerelease,publishedAt' - )) + $releases = @(Get-GhReleasesPortable -Repository $SurfaceRepository -Limit 100 -ExcludeDrafts) $canaryReleases = @( $releases | Where-Object { diff --git a/scripts/lib/WorkflowOps.Common.ps1 b/scripts/lib/WorkflowOps.Common.ps1 index e4d9e83..dbbc312 100644 --- a/scripts/lib/WorkflowOps.Common.ps1 +++ b/scripts/lib/WorkflowOps.Common.ps1 @@ -98,6 +98,137 @@ function Convert-InputPairsToGhArgs { return ,$arguments } +function Test-WorkflowRunMatch { + param( + [Parameter(Mandatory = $true)][object]$Run, + [Parameter()][string]$Workflow = '' + ) + + if ([string]::IsNullOrWhiteSpace($Workflow)) { + return $true + } + + $token = ([string]$Workflow).Trim().ToLowerInvariant() + $runName = ([string]$Run.name).Trim().ToLowerInvariant() + $runPath = ([string]$Run.path).Trim().ToLowerInvariant() + if ($runName -eq $token) { + return $true + } + if ([string]::IsNullOrWhiteSpace($runPath)) { + return $false + } + + if ($runPath.Contains($token)) { + return $true + } + + if (-not $token.EndsWith('.yml') -and -not $token.EndsWith('.yaml')) { + if ($runPath.EndsWith("/$token.yml") -or $runPath.EndsWith("/$token.yaml")) { + return $true + } + } + + return $false +} + +function Get-GhWorkflowRunsPortable { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter()][string]$Workflow = '', + [Parameter()][string]$Branch = '', + [Parameter()][string]$Event = '', + [Parameter()][int]$Limit = 20 + ) + + $safeLimit = [Math]::Max(1, [Math]::Min($Limit, 100)) + $runsPayload = Invoke-GhJson -Arguments @( + 'api', + "repos/$Repository/actions/runs?per_page=100" + ) + $allRuns = @($runsPayload.workflow_runs) + if (@($allRuns).Count -eq 0) { + return @() + } + + $branchToken = ([string]$Branch).Trim().ToLowerInvariant() + $eventToken = ([string]$Event).Trim().ToLowerInvariant() + $records = @() + foreach ($run in $allRuns) { + if (-not (Test-WorkflowRunMatch -Run $run -Workflow $Workflow)) { + continue + } + + $runBranch = ([string]$run.head_branch).Trim().ToLowerInvariant() + if (-not [string]::IsNullOrWhiteSpace($branchToken) -and $runBranch -ne $branchToken) { + continue + } + + $runEvent = ([string]$run.event).Trim().ToLowerInvariant() + if (-not [string]::IsNullOrWhiteSpace($eventToken) -and $runEvent -ne $eventToken) { + continue + } + + $records += [pscustomobject]@{ + databaseId = [string]$run.id + status = [string]$run.status + conclusion = [string]$run.conclusion + url = [string]$run.html_url + createdAt = [string]$run.created_at + updatedAt = [string]$run.updated_at + headSha = [string]$run.head_sha + event = [string]$run.event + workflowName = [string]$run.name + displayTitle = [string]$run.display_title + headBranch = [string]$run.head_branch + } + } + + return @( + $records | + Sort-Object { Parse-RunTimestamp -Run $_ } -Descending | + Select-Object -First $safeLimit + ) +} + +function Get-GhReleasesPortable { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter()][int]$Limit = 30, + [Parameter()][switch]$ExcludeDrafts + ) + + $safeLimit = [Math]::Max(1, [Math]::Min($Limit, 100)) + $releasePayload = Invoke-GhJson -Arguments @( + 'api', + "repos/$Repository/releases?per_page=100" + ) + $allReleases = @($releasePayload) + if (@($allReleases).Count -eq 0) { + return @() + } + + $records = @() + foreach ($release in $allReleases) { + $isDraft = [bool]$release.draft + if ($ExcludeDrafts -and $isDraft) { + continue + } + + $records += [pscustomobject]@{ + tagName = [string]$release.tag_name + isPrerelease = [bool]$release.prerelease + publishedAt = [string]$release.published_at + url = [string]$release.html_url + isDraft = $isDraft + } + } + + return @( + $records | + Select-Object -First $safeLimit + ) +} + function Parse-RunTimestamp { param([Parameter(Mandatory = $true)][object]$Run) diff --git a/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 b/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 index 95f5a17..4d8d6a2 100644 --- a/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 +++ b/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 @@ -36,7 +36,7 @@ Describe 'Canary smoke tag hygiene workflow contract' { } It 'enforces keep-latest canary tag cleanup behavior' { - $script:scriptContent | Should -Match 'release''\s*,\s*''list''' + $script:scriptContent | Should -Match 'Get-GhReleasesPortable' $script:scriptContent | Should -Match 'release''\s*,\s*''delete''' $script:scriptContent | Should -Match '--cleanup-tag' $script:scriptContent | Should -Match 'KeepLatestN' diff --git a/tests/OpsMonitoringWorkflowContract.Tests.ps1 b/tests/OpsMonitoringWorkflowContract.Tests.ps1 index 3f95f28..33d26ec 100644 --- a/tests/OpsMonitoringWorkflowContract.Tests.ps1 +++ b/tests/OpsMonitoringWorkflowContract.Tests.ps1 @@ -35,8 +35,9 @@ Describe 'Ops monitoring workflow contract' { It 'checks runner and sync-guard health with deterministic reason codes' { $script:scriptContent | Should -Match 'repos/\$SurfaceRepository/actions/runners\?per_page=100' - $script:scriptContent | Should -Match 'run''\s*,\s*''list''' + $script:scriptContent | Should -Match 'Get-GhWorkflowRunsPortable' $script:scriptContent | Should -Match 'runner_unavailable' + $script:scriptContent | Should -Match 'runner_visibility_unavailable' $script:scriptContent | Should -Match 'sync_guard_failed' $script:scriptContent | Should -Match 'sync_guard_stale' $script:scriptContent | Should -Match 'sync_guard_missing' diff --git a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 new file mode 100644 index 0000000..0e5b34f --- /dev/null +++ b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 @@ -0,0 +1,44 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release control plane local Docker harness contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:wrapperPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1' + $script:harnessPath = Join-Path $script:repoRoot 'scripts/Exercise-ReleaseControlPlaneLocal.ps1' + + if (-not (Test-Path -LiteralPath $script:wrapperPath -PathType Leaf)) { + throw "Local Docker wrapper missing: $script:wrapperPath" + } + if (-not (Test-Path -LiteralPath $script:harnessPath -PathType Leaf)) { + throw "Local Docker harness runtime missing: $script:harnessPath" + } + + $script:wrapperContent = Get-Content -LiteralPath $script:wrapperPath -Raw + $script:harnessContent = Get-Content -LiteralPath $script:harnessPath -Raw + } + + It 'wraps release control plane local harness through portable container runtime' { + $script:wrapperContent | Should -Match 'Invoke-PortableOps\.ps1' + $script:wrapperContent | Should -Match 'Exercise-ReleaseControlPlaneLocal\.ps1' + $script:wrapperContent | Should -Match 'ghcr\.io/svelderrainruiz/labview-cdev-surface-ops:v1' + $script:wrapperContent | Should -Match 'BuildLocalImage' + $script:wrapperContent | Should -Match 'HostFallback' + } + + It 'executes deterministic control-plane local steps and writes summary report' { + $script:harnessContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:harnessContent | Should -Match 'Invoke-OpsAutoRemediation\.ps1' + $script:harnessContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' + $script:harnessContent | Should -Match 'Write-OpsSloReport\.ps1' + $script:harnessContent | Should -Match 'release-control-plane-local-summary\.json' + } + + It 'guards mutating modes unless explicitly allowed' { + $script:harnessContent | Should -Match 'mutating_mode_blocked' + $script:harnessContent | Should -Match 'AllowMutatingModes' + $script:harnessContent | Should -Match 'DryRun' + } +} diff --git a/tests/VsCodeTasksContract.Tests.ps1 b/tests/VsCodeTasksContract.Tests.ps1 index 4e88283..cf74652 100644 --- a/tests/VsCodeTasksContract.Tests.ps1 +++ b/tests/VsCodeTasksContract.Tests.ps1 @@ -19,6 +19,7 @@ Describe 'VS Code tasks contract for portable workflow ops' { $script:labels | Should -Contain 'ops: cancel stale runs (portable)' $script:labels | Should -Contain 'ops: watch run (portable)' $script:labels | Should -Contain 'ops: runner queue snapshot (portable)' + $script:labels | Should -Contain 'ops: release control plane local (docker)' } It 'routes tasks through Invoke-PortableOps wrapper' { @@ -28,5 +29,6 @@ Describe 'VS Code tasks contract for portable workflow ops' { $raw | Should -Match 'Cancel-StaleWorkflowRuns\.ps1' $raw | Should -Match 'Watch-WorkflowRun\.ps1' $raw | Should -Match 'Get-RunnerQueueSnapshot\.ps1' + $raw | Should -Match 'Invoke-ReleaseControlPlaneLocalDocker\.ps1' } } diff --git a/tools/ops-runtime/Dockerfile b/tools/ops-runtime/Dockerfile index c8ae92f..3e6f6a4 100644 --- a/tools/ops-runtime/Dockerfile +++ b/tools/ops-runtime/Dockerfile @@ -4,4 +4,6 @@ RUN apt-get update \ && apt-get install -y --no-install-recommends git jq gh ca-certificates \ && rm -rf /var/lib/apt/lists/* +RUN pwsh -NoLogo -NoProfile -Command "Set-PSRepository -Name PSGallery -InstallationPolicy Trusted; Install-Module -Name Pester -Scope AllUsers -Force -MinimumVersion 5.5.0" + WORKDIR /workspace diff --git a/tools/ops-runtime/README.md b/tools/ops-runtime/README.md new file mode 100644 index 0000000..0e70e74 --- /dev/null +++ b/tools/ops-runtime/README.md @@ -0,0 +1,23 @@ +# Ops Runtime Container + +This container is the portable Docker package for local ops exercises. + +Default image: +- `ghcr.io/svelderrainruiz/labview-cdev-surface-ops:v1` + +Build locally: + +```powershell +docker build -f .\tools\ops-runtime\Dockerfile -t labview-cdev-surface-ops:local .\tools\ops-runtime +``` + +Run the release control-plane local harness with this package: + +```powershell +pwsh -NoProfile -File .\scripts\Invoke-ReleaseControlPlaneLocalDocker.ps1 ` + -BuildLocalImage ` + -LocalTag labview-cdev-surface-ops:local ` + -Mode Validate ` + -DryRun ` + -RunContractTests +``` From dac281c5708eb0cc35fcfb71d33bd36553c885df Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 16:06:34 -0800 Subject: [PATCH 06/60] feat(ops): publish runtime image to ghcr --- .github/workflows/ci.yml | 1 + .../workflows/publish-ops-runtime-image.yml | 113 ++++++++++++++++++ README.md | 19 +++ scripts/Invoke-PortableOps.ps1 | 2 +- .../Invoke-ReleaseControlPlaneLocalDocker.ps1 | 2 +- ...timeImagePublishWorkflowContract.Tests.ps1 | 38 ++++++ ...lPlaneLocalDockerHarnessContract.Tests.ps1 | 2 +- tools/ops-runtime/README.md | 2 +- 8 files changed, 175 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/publish-ops-runtime-image.yml create mode 100644 tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ca40b1..4e59fc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,6 +62,7 @@ jobs: './tests/CancelStaleWorkflowRunsContract.Tests.ps1', './tests/WatchWorkflowRunContract.Tests.ps1', './tests/PortableOpsRuntimeContract.Tests.ps1', + './tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1', './tests/VsCodeTasksContract.Tests.ps1', './tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1', './tests/UploadArtifactRetryCompositeContract.Tests.ps1', diff --git a/.github/workflows/publish-ops-runtime-image.yml b/.github/workflows/publish-ops-runtime-image.yml new file mode 100644 index 0000000..52c3dc1 --- /dev/null +++ b/.github/workflows/publish-ops-runtime-image.yml @@ -0,0 +1,113 @@ +name: publish-ops-runtime-image + +on: + workflow_dispatch: + inputs: + promote_v1: + description: Also refresh the v1 tag. + required: false + default: true + type: boolean + additional_tag: + description: Optional extra tag (for example canary or rc1). + required: false + default: '' + type: string + push: + branches: + - main + paths: + - tools/ops-runtime/Dockerfile + - scripts/Invoke-PortableOps.ps1 + - scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 + - tools/ops-runtime/README.md + +permissions: + contents: read + packages: write + +concurrency: + group: publish-ops-runtime-image-${{ github.ref }} + cancel-in-progress: false + +jobs: + publish: + name: Publish Ops Runtime Image + runs-on: ubuntu-latest + env: + IMAGE_REPO: ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Resolve deterministic tags + id: resolve + shell: bash + run: | + set -euo pipefail + + date_utc="$(date -u +%Y%m%d)" + short_sha="${GITHUB_SHA:0:12}" + promote_v1="${{ github.event.inputs.promote_v1 }}" + additional_tag="${{ github.event.inputs.additional_tag }}" + + if [[ -z "$promote_v1" ]]; then + promote_v1="true" + fi + + if [[ -n "$additional_tag" ]] && [[ ! "$additional_tag" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "additional_tag must match ^[A-Za-z0-9._-]+$" >&2 + exit 1 + fi + + tags=() + tags+=("${IMAGE_REPO}:sha-${short_sha}") + tags+=("${IMAGE_REPO}:v1-${date_utc}") + if [[ "$promote_v1" == "true" ]]; then + tags+=("${IMAGE_REPO}:v1") + fi + if [[ -n "$additional_tag" ]]; then + tags+=("${IMAGE_REPO}:${additional_tag}") + fi + + { + echo "date_utc=$date_utc" + echo "short_sha=$short_sha" + echo "tags<> "$GITHUB_OUTPUT" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push image + id: build + uses: docker/build-push-action@v6 + with: + context: ./tools/ops-runtime + file: ./tools/ops-runtime/Dockerfile + push: true + tags: ${{ steps.resolve.outputs.tags }} + + - name: Publish summary + shell: bash + run: | + { + echo "## Ops Runtime Image Published" + echo "" + echo "- Image: \`${IMAGE_REPO}\`" + echo "- Digest: \`${{ steps.build.outputs.digest }}\`" + echo "- Commit: \`${GITHUB_SHA}\`" + echo "- Tags:" + while IFS= read -r tag; do + echo " - \`$tag\`" + done <<< "${{ steps.resolve.outputs.tags }}" + } >> "$GITHUB_STEP_SUMMARY" diff --git a/README.md b/README.md index 98893de..bc38de0 100644 --- a/README.md +++ b/README.md @@ -341,10 +341,29 @@ pwsh -NoProfile -File .\scripts\Invoke-ReleaseControlPlaneLocalDocker.ps1 ` This executes `scripts/Exercise-ReleaseControlPlaneLocal.ps1` in the portable ops container image and writes artifacts under: - `artifacts\release-control-plane-local` +- Default container image: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops:v1` For offline or container runtime fallback on the host: - add `-HostFallback` +## Publish Ops Runtime Image + +`publish-ops-runtime-image.yml` publishes the portable ops runtime container to: +- `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops` + +Deterministic tags: +- `sha-<12-char-commit>` +- `v1-YYYYMMDD` +- `v1` (when `promote_v1=true`) + +Manual publish: + +```powershell +gh workflow run publish-ops-runtime-image.yml ` + -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f promote_v1=true +``` + Runbook for incidents: - `docs/runbooks/release-ops-incident-response.md` diff --git a/scripts/Invoke-PortableOps.ps1 b/scripts/Invoke-PortableOps.ps1 index bf4cd64..d45763e 100644 --- a/scripts/Invoke-PortableOps.ps1 +++ b/scripts/Invoke-PortableOps.ps1 @@ -8,7 +8,7 @@ param( [string[]]$ScriptArguments = @(), [Parameter()] - [string]$Image = 'ghcr.io/svelderrainruiz/labview-cdev-surface-ops:v1', + [string]$Image = 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops:v1', [Parameter()] [switch]$BuildLocalImage, diff --git a/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 b/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 index 7070cc9..ba1fa98 100644 --- a/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 +++ b/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 @@ -37,7 +37,7 @@ param( [string]$OutputRoot = 'artifacts/release-control-plane-local', [Parameter()] - [string]$Image = 'ghcr.io/svelderrainruiz/labview-cdev-surface-ops:v1', + [string]$Image = 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops:v1', [Parameter()] [switch]$BuildLocalImage, diff --git a/tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1 b/tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..02add24 --- /dev/null +++ b/tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1 @@ -0,0 +1,38 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Ops runtime image publish workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/publish-ops-runtime-image.yml' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Ops runtime publish workflow missing: $script:workflowPath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + } + + It 'supports manual dispatch and deterministic main-path publish triggers' { + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'tools/ops-runtime/Dockerfile' + $script:workflowContent | Should -Match 'Invoke-PortableOps\.ps1' + $script:workflowContent | Should -Match 'Invoke-ReleaseControlPlaneLocalDocker\.ps1' + } + + It 'publishes to GHCR with package write permission' { + $script:workflowContent | Should -Match 'packages:\s*write' + $script:workflowContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-surface-ops' + $script:workflowContent | Should -Match 'docker/login-action@v3' + $script:workflowContent | Should -Match 'docker/build-push-action@v6' + } + + It 'derives immutable tags and reports pushed digest' { + $script:workflowContent | Should -Match 'sha-\$\{short_sha\}' + $script:workflowContent | Should -Match 'v1-\$\{date_utc\}' + $script:workflowContent | Should -Match 'steps\.build\.outputs\.digest' + } +} diff --git a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 index 0e5b34f..8456af8 100644 --- a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 @@ -23,7 +23,7 @@ Describe 'Release control plane local Docker harness contract' { It 'wraps release control plane local harness through portable container runtime' { $script:wrapperContent | Should -Match 'Invoke-PortableOps\.ps1' $script:wrapperContent | Should -Match 'Exercise-ReleaseControlPlaneLocal\.ps1' - $script:wrapperContent | Should -Match 'ghcr\.io/svelderrainruiz/labview-cdev-surface-ops:v1' + $script:wrapperContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-surface-ops:v1' $script:wrapperContent | Should -Match 'BuildLocalImage' $script:wrapperContent | Should -Match 'HostFallback' } diff --git a/tools/ops-runtime/README.md b/tools/ops-runtime/README.md index 0e70e74..a3a466b 100644 --- a/tools/ops-runtime/README.md +++ b/tools/ops-runtime/README.md @@ -3,7 +3,7 @@ This container is the portable Docker package for local ops exercises. Default image: -- `ghcr.io/svelderrainruiz/labview-cdev-surface-ops:v1` +- `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops:v1` Build locally: From 7f281d840a7d64870f515c9ed80d9d91407103d2 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:06:10 -0800 Subject: [PATCH 07/60] feat: add container NSIS self-test and linux parity image publish (#6) Co-authored-by: svelderrainruiz --- .github/workflows/ci.yml | 3 + .../publish-linux-nsis-parity-image.yml | 114 +++++ AGENTS.md | 2 + README.md | 39 ++ nsis/workspace-bootstrap-installer.nsi | 7 +- scripts/Build-WorkspaceBootstrapInstaller.ps1 | 8 + scripts/Install-WorkspaceFromManifest.ps1 | 404 +++++++++-------- scripts/Invoke-LinuxContainerNsisParity.ps1 | 275 ++++++++++++ .../Invoke-WindowsContainerNsisSelfTest.ps1 | 410 ++++++++++++++++++ ...uild-WorkspaceBootstrapInstaller.Tests.ps1 | 2 + ...LinuxContainerNsisParityContract.Tests.ps1 | 53 +++ ...rityImagePublishWorkflowContract.Tests.ps1 | 38 ++ ...owsContainerNsisSelfTestContract.Tests.ps1 | 62 +++ .../WorkspaceInstallRuntimeContract.Tests.ps1 | 5 + tests/WorkspaceSurfaceContract.Tests.ps1 | 13 + tools/nsis-selftest-linux/Dockerfile | 35 ++ tools/nsis-selftest-linux/README.md | 28 ++ tools/nsis-selftest-windows/Dockerfile | 6 + tools/nsis-selftest-windows/README.md | 30 ++ 19 files changed, 1356 insertions(+), 178 deletions(-) create mode 100644 .github/workflows/publish-linux-nsis-parity-image.yml create mode 100644 scripts/Invoke-LinuxContainerNsisParity.ps1 create mode 100644 scripts/Invoke-WindowsContainerNsisSelfTest.ps1 create mode 100644 tests/LinuxContainerNsisParityContract.Tests.ps1 create mode 100644 tests/LinuxNsisParityImagePublishWorkflowContract.Tests.ps1 create mode 100644 tests/WindowsContainerNsisSelfTestContract.Tests.ps1 create mode 100644 tools/nsis-selftest-linux/Dockerfile create mode 100644 tools/nsis-selftest-linux/README.md create mode 100644 tools/nsis-selftest-windows/Dockerfile create mode 100644 tools/nsis-selftest-windows/README.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4e59fc0..4946bcf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,6 +63,7 @@ jobs: './tests/WatchWorkflowRunContract.Tests.ps1', './tests/PortableOpsRuntimeContract.Tests.ps1', './tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1', + './tests/LinuxNsisParityImagePublishWorkflowContract.Tests.ps1', './tests/VsCodeTasksContract.Tests.ps1', './tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1', './tests/UploadArtifactRetryCompositeContract.Tests.ps1', @@ -77,6 +78,8 @@ jobs: './tests/LinuxLabviewImageGateWorkflowContract.Tests.ps1', './tests/ReleaseWithWindowsGateWorkflowContract.Tests.ps1', './tests/DockerDesktopLinuxIterationContract.Tests.ps1', + './tests/LinuxContainerNsisParityContract.Tests.ps1', + './tests/WindowsContainerNsisSelfTestContract.Tests.ps1', './tests/WorkspaceInstallerExerciseContract.Tests.ps1', './tests/WorkspaceInstallerIterationContract.Tests.ps1', './tests/ScopeAOpsRunbookContract.Tests.ps1' diff --git a/.github/workflows/publish-linux-nsis-parity-image.yml b/.github/workflows/publish-linux-nsis-parity-image.yml new file mode 100644 index 0000000..0fe0863 --- /dev/null +++ b/.github/workflows/publish-linux-nsis-parity-image.yml @@ -0,0 +1,114 @@ +name: publish-linux-nsis-parity-image + +on: + workflow_dispatch: + inputs: + promote_latest: + description: Also refresh the latest tag. + required: false + default: false + type: boolean + additional_tag: + description: Optional extra tag (for example canary or rc1). + required: false + default: '' + type: string + push: + branches: + - main + paths: + - tools/nsis-selftest-linux/Dockerfile + - tools/nsis-selftest-linux/README.md + - scripts/Invoke-LinuxContainerNsisParity.ps1 + - .github/workflows/publish-linux-nsis-parity-image.yml + +permissions: + contents: read + packages: write + +concurrency: + group: publish-linux-nsis-parity-image-${{ github.ref }} + cancel-in-progress: false + +jobs: + publish: + name: Publish Linux NSIS Parity Image + runs-on: ubuntu-latest + env: + IMAGE_REPO: ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-linux-parity + BASE_TAG: 2026q1-linux + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Resolve deterministic tags + id: resolve + shell: bash + run: | + set -euo pipefail + + date_utc="$(date -u +%Y%m%d)" + short_sha="${GITHUB_SHA:0:12}" + promote_latest="${{ github.event.inputs.promote_latest }}" + additional_tag="${{ github.event.inputs.additional_tag }}" + + if [[ -z "$promote_latest" ]]; then + promote_latest="false" + fi + + if [[ -n "$additional_tag" ]] && [[ ! "$additional_tag" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "additional_tag must match ^[A-Za-z0-9._-]+$" >&2 + exit 1 + fi + + tags=() + tags+=("${IMAGE_REPO}:sha-${short_sha}") + tags+=("${IMAGE_REPO}:${BASE_TAG}-${date_utc}") + if [[ "$promote_latest" == "true" ]]; then + tags+=("${IMAGE_REPO}:latest") + fi + if [[ -n "$additional_tag" ]]; then + tags+=("${IMAGE_REPO}:${additional_tag}") + fi + + { + echo "date_utc=$date_utc" + echo "short_sha=$short_sha" + echo "tags<> "$GITHUB_OUTPUT" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push image + id: build + uses: docker/build-push-action@v6 + with: + context: ./tools/nsis-selftest-linux + file: ./tools/nsis-selftest-linux/Dockerfile + push: true + tags: ${{ steps.resolve.outputs.tags }} + + - name: Publish summary + shell: bash + run: | + { + echo "## Linux NSIS Parity Image Published" + echo "" + echo "- Image: \`${IMAGE_REPO}\`" + echo "- Digest: \`${{ steps.build.outputs.digest }}\`" + echo "- Commit: \`${GITHUB_SHA}\`" + echo "- Tags:" + while IFS= read -r tag; do + echo " - \`$tag\`" + done <<< "${{ steps.resolve.outputs.tags }}" + } >> "$GITHUB_STEP_SUMMARY" diff --git a/AGENTS.md b/AGENTS.md index a5c2e76..4c0d709 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -241,6 +241,8 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `-Mode full` for isolated smoke install validation. - `-Watch` to auto-rerun on contract file changes without manual restarts. - Use `scripts/Invoke-DockerDesktopLinuxIteration.ps1 -DockerContext desktop-linux` for Docker Desktop Linux command-surface checks (`runner-cli --help`, `runner-cli ppl --help`) before full Windows LabVIEW image runs. +- Use `scripts/Invoke-WindowsContainerNsisSelfTest.ps1` to build the workspace NSIS installer and run silent install (`/S`) inside the same Windows container with `ContainerSmoke` execution context; this image is aligned to `nationalinstruments/labview:2026q1-windows` and fails fast with `windows_container_mode_required` if Docker is not in Windows container mode. +- Use `scripts/Invoke-LinuxContainerNsisParity.ps1 -DockerContext desktop-linux` for parity checks aligned to `nationalinstruments/labview:2026q1-linux`; this lane compiles NSIS smoke output but does not execute Windows installers on Linux. - Use `scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1` for local containerized release-control-plane exercise (`Validate` + `DryRun` default). - If Docker Desktop Linux context is unavailable, confirm `Microsoft-Hyper-V-All`, `VirtualMachinePlatform`, and `Microsoft-Windows-Subsystem-Linux` are enabled, then reboot before retrying. - Use `scripts/Test-RunnerCliBundleDeterminism.ps1` and `scripts/Test-WorkspaceInstallerDeterminism.ps1` locally before proposing release-tag publication. diff --git a/README.md b/README.md index bc38de0..00f521b 100644 --- a/README.md +++ b/README.md @@ -199,6 +199,45 @@ pwsh -NoProfile -File .\scripts\Invoke-DockerDesktopLinuxIteration.ps1 ` This lane bundles manifest-pinned `runner-cli` for `linux-x64`, runs `runner-cli --help` and `runner-cli ppl --help` inside the container, and optionally executes core Pester contract tests. If Docker Desktop cannot start, verify Windows virtualization features are enabled (`Microsoft-Hyper-V-All`, `VirtualMachinePlatform`, `Microsoft-Windows-Subsystem-Linux`) and reboot after feature changes. +## Windows container NSIS self-test + +Build the NSIS self-test image (optional) and run a full build + silent install in the same Windows container. +The runtime is aligned to `nationalinstruments/labview:2026q1-windows`: + +```powershell +pwsh -NoProfile -File .\scripts\Invoke-WindowsContainerNsisSelfTest.ps1 ` + -BuildLocalImage ` + -Image labview-cdev-surface-nsis-selftest:local +``` + +This wrapper fails fast with `windows_container_mode_required` unless Docker reports `OSType=windows`. + +Outputs are written under: +- `artifacts\release\windows-container-nsis-selftest` +- `container-report.json` +- `windows-container-nsis-selftest-report.json` + +## Linux NSIS parity container + +Use the Linux parity runtime aligned to `nationalinstruments/labview:2026q1-linux`: + +```powershell +pwsh -NoProfile -File .\scripts\Invoke-LinuxContainerNsisParity.ps1 ` + -BuildLocalImage ` + -Image labview-cdev-surface-nsis-linux-parity:local ` + -DockerContext desktop-linux +``` + +This lane validates Linux toolchain parity (`labviewcli`, `pwsh`, `dotnet`, `git`, `makensis`) and compiles a minimal NSIS smoke installer. +Installer execution is intentionally skipped on Linux (`windows_installer_not_executable_on_linux`). +The parity image uses an apt-driven dependency model aligned to NI's Linux custom-image guidance (`labview-for-containers/docs/linux-custom-images.md`). + +Publish the Linux parity image to GHCR with deterministic tags: +- Workflow: `.github/workflows/publish-linux-nsis-parity-image.yml` +- Image repo: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-linux-parity` +- Default tags: `sha-<12-char-commit>`, `2026q1-linux-` +- Optional manual tags: `latest` (`promote_latest=true`) and `additional_tag` + ## Publish Release (Automated Gate) Use manual workflow dispatch for release publication: diff --git a/nsis/workspace-bootstrap-installer.nsi b/nsis/workspace-bootstrap-installer.nsi index 7200411..46a8f6b 100644 --- a/nsis/workspace-bootstrap-installer.nsi +++ b/nsis/workspace-bootstrap-installer.nsi @@ -39,6 +39,10 @@ Name "LVIE Cdev Workspace Bootstrap" !define REQUIRED_LABVIEW_YEAR "2020" !endif +!ifndef INSTALL_EXEC_CONTEXT + !define INSTALL_EXEC_CONTEXT "NsisInstall" +!endif + !ifndef X86_NIPKG_ENV !define X86_NIPKG_ENV "LVIE_LABVIEW_X86_NIPKG_INSTALL_CMD" !endif @@ -71,6 +75,7 @@ Section "Install" FileWrite $2 "manifest=$INSTDIR\${MANIFEST_REL}$\r$\n" FileWrite $2 "report=${WORKSPACE_ROOT}\${REPORT_REL}$\r$\n" FileWrite $2 "powershell_exe=$1$\r$\n" + FileWrite $2 "install_execution_context=${INSTALL_EXEC_CONTEXT}$\r$\n" FileWrite $2 "required_labview_year=${REQUIRED_LABVIEW_YEAR}$\r$\n" FileClose $2 @@ -131,7 +136,7 @@ Section "Install" Abort labview_x86_ready: - ExecWait '"$SYSDIR\cmd.exe" /c ""$1" -NoLogo -NoProfile -NonInteractive -ExecutionPolicy Bypass -File "$INSTDIR\${INSTALL_SCRIPT_REL}" -WorkspaceRoot "${WORKSPACE_ROOT}" -ManifestPath "$INSTDIR\${MANIFEST_REL}" -Mode Install -InstallerExecutionContext NsisInstall -OutputPath "${WORKSPACE_ROOT}\${REPORT_REL}" >> "${WORKSPACE_ROOT}\${LAUNCH_LOG_REL}" 2>&1"' $0 + ExecWait '"$SYSDIR\cmd.exe" /c ""$1" -NoLogo -NoProfile -NonInteractive -ExecutionPolicy Bypass -File "$INSTDIR\${INSTALL_SCRIPT_REL}" -WorkspaceRoot "${WORKSPACE_ROOT}" -ManifestPath "$INSTDIR\${MANIFEST_REL}" -Mode Install -InstallerExecutionContext ${INSTALL_EXEC_CONTEXT} -OutputPath "${WORKSPACE_ROOT}\${REPORT_REL}" >> "${WORKSPACE_ROOT}\${LAUNCH_LOG_REL}" 2>&1"' $0 FileOpen $2 "${WORKSPACE_ROOT}\${LAUNCH_LOG_REL}" a FileWrite $2 "exit_code=$0$\r$\n" FileClose $2 diff --git a/scripts/Build-WorkspaceBootstrapInstaller.ps1 b/scripts/Build-WorkspaceBootstrapInstaller.ps1 index 14b1fad..1b04be5 100644 --- a/scripts/Build-WorkspaceBootstrapInstaller.ps1 +++ b/scripts/Build-WorkspaceBootstrapInstaller.ps1 @@ -12,6 +12,10 @@ param( [Parameter(Mandatory = $false)] [string]$RequiredLabviewYear = '2020', + [Parameter(Mandatory = $false)] + [ValidateSet('NsisInstall', 'LocalInstallerExercise', 'ContainerSmoke')] + [string]$InstallerExecutionContext = 'NsisInstall', + [Parameter(Mandatory = $false)] [string]$NsisScriptPath, @@ -206,6 +210,7 @@ function Invoke-NsisBuild { [Parameter(Mandatory = $true)][string]$OutputInstallerPath, [Parameter(Mandatory = $true)][string]$WorkspaceRoot, [Parameter(Mandatory = $true)][string]$LabviewYear, + [Parameter(Mandatory = $true)][string]$InstallExecutionContext, [Parameter(Mandatory = $true)][bool]$DeterministicBuild, [Parameter(Mandatory = $true)][long]$EpochSeconds ) @@ -216,6 +221,7 @@ function Invoke-NsisBuild { ("/DPAYLOAD_DIR=$StagedPayloadPath"), ("/DWORKSPACE_ROOT=$WorkspaceRoot"), ("/DREQUIRED_LABVIEW_YEAR=$LabviewYear"), + ("/DINSTALL_EXEC_CONTEXT=$InstallExecutionContext"), ("/DSOURCE_DATE_EPOCH=$EpochSeconds"), $ScriptPathResolved ) @@ -276,6 +282,7 @@ try { -OutputInstallerPath $outputPath ` -WorkspaceRoot $WorkspaceRootDefault ` -LabviewYear $RequiredLabviewYear ` + -InstallExecutionContext $InstallerExecutionContext ` -DeterministicBuild $Deterministic ` -EpochSeconds $epoch $hash = Get-Sha256Hex -Path $outputPath @@ -318,6 +325,7 @@ try { -OutputInstallerPath $resolvedOutputPath ` -WorkspaceRoot $WorkspaceRootDefault ` -LabviewYear $RequiredLabviewYear ` + -InstallExecutionContext $InstallerExecutionContext ` -DeterministicBuild $Deterministic ` -EpochSeconds $epoch diff --git a/scripts/Install-WorkspaceFromManifest.ps1 b/scripts/Install-WorkspaceFromManifest.ps1 index d6ab2ad..f5479c5 100644 --- a/scripts/Install-WorkspaceFromManifest.ps1 +++ b/scripts/Install-WorkspaceFromManifest.ps1 @@ -1140,6 +1140,7 @@ $governanceAudit = [ordered]@{ message = '' } $postActionSequence = New-Object System.Collections.ArrayList +$isContainerSmokeInstall = ($Mode -eq 'Install' -and [string]$InstallerExecutionContext -eq 'ContainerSmoke') $contractSplit = [ordered]@{ execution_profile = 'host-release' skip_vip_harness = $false @@ -1190,16 +1191,24 @@ try { } foreach ($commandName in @('git', 'gh', 'g-cli')) { + $isRequiredCommand = $true + if ($isContainerSmokeInstall) { + $isRequiredCommand = $false + } $cmd = Get-Command $commandName -ErrorAction SilentlyContinue $check = [ordered]@{ command = $commandName - required = $true + required = $isRequiredCommand present = To-Bool ($null -ne $cmd) path = if ($null -ne $cmd) { $cmd.Source } else { '' } } $dependencyChecks += [pscustomobject]$check if (-not $check.present) { - $errors += "Required command '$commandName' was not found on PATH." + if ($check.required) { + $errors += "Required command '$commandName' was not found on PATH." + } else { + $warnings += "Optional command '$commandName' was not found on PATH for ContainerSmoke context." + } } } @@ -1215,6 +1224,10 @@ try { $offlineGitModeRaw = [string]$env:LVIE_OFFLINE_GIT_MODE $offlineGitMode = ($offlineGitModeRaw -match '^(1|true|yes)$') + if ($isContainerSmokeInstall) { + $offlineGitMode = $true + Write-InstallerFeedback -Message 'ContainerSmoke context forces LVIE_OFFLINE_GIT_MODE behavior.' + } if ($offlineGitMode) { Write-InstallerFeedback -Message 'LVIE_OFFLINE_GIT_MODE is enabled; git network fetch/clone operations will be skipped.' } @@ -1438,10 +1451,10 @@ try { if ($Mode -eq 'Install') { if ([string]::IsNullOrWhiteSpace($InstallerExecutionContext)) { - throw "Install mode requires -InstallerExecutionContext NsisInstall (or LocalInstallerExercise)." + throw "Install mode requires -InstallerExecutionContext NsisInstall (or LocalInstallerExercise or ContainerSmoke)." } - if ($InstallerExecutionContext -notin @('NsisInstall', 'LocalInstallerExercise')) { - throw "Unsupported execution context '$InstallerExecutionContext'. Expected NsisInstall or LocalInstallerExercise." + if ($InstallerExecutionContext -notin @('NsisInstall', 'LocalInstallerExercise', 'ContainerSmoke')) { + throw "Unsupported execution context '$InstallerExecutionContext'. Expected NsisInstall, LocalInstallerExercise, or ContainerSmoke." } } @@ -1474,10 +1487,34 @@ try { $repoTotal = @($manifest.managed_repos).Count $repoIndex = 0 - foreach ($repo in @($manifest.managed_repos)) { - $repoIndex++ - $repoPath = [string]$repo.path - $repoName = [string]$repo.repo_name + if ($isContainerSmokeInstall) { + foreach ($repo in @($manifest.managed_repos)) { + $repositoryResults += [pscustomobject]([ordered]@{ + path = [string]$repo.path + repo_name = [string]$repo.repo_name + required_gh_repo = [string]$repo.required_gh_repo + default_branch = [string]$repo.default_branch + pinned_sha = ([string]$repo.pinned_sha).ToLowerInvariant() + exists_before = $false + action = 'container_smoke_skip' + status = 'skipped' + issues = @() + message = 'ContainerSmoke context skips repository contract enforcement.' + remote_checks = @() + head_sha = '' + branch_state = 'skipped' + }) + } + Add-PostActionSequenceEntry ` + -Sequence $postActionSequence ` + -Phase 'repository-contracts' ` + -Status 'skipped' ` + -Message ("Skipped repository contract checks for ContainerSmoke context ({0} repos)." -f $repoTotal) + } else { + foreach ($repo in @($manifest.managed_repos)) { + $repoIndex++ + $repoPath = [string]$repo.path + $repoName = [string]$repo.repo_name $defaultBranch = [string]$repo.default_branch $requiredGhRepo = [string]$repo.required_gh_repo $pinnedSha = ([string]$repo.pinned_sha).ToLowerInvariant() @@ -1662,21 +1699,22 @@ try { $errors += "$repoPath :: $($repoResult.message)" } - $repositoryResults += [pscustomobject]$repoResult - } - $repoFailureCount = @($repositoryResults | Where-Object { [string]$_.status -ne 'pass' }).Count - if ($repoFailureCount -eq 0) { - Add-PostActionSequenceEntry ` - -Sequence $postActionSequence ` - -Phase 'repository-contracts' ` - -Status 'pass' ` - -Message ("Verified repository contract for {0} managed repos." -f $repoTotal) - } else { - Add-PostActionSequenceEntry ` - -Sequence $postActionSequence ` - -Phase 'repository-contracts' ` - -Status 'fail' ` - -Message ("Repository contract checks failed for {0} managed repos." -f $repoFailureCount) + $repositoryResults += [pscustomobject]$repoResult + } + $repoFailureCount = @($repositoryResults | Where-Object { [string]$_.status -ne 'pass' }).Count + if ($repoFailureCount -eq 0) { + Add-PostActionSequenceEntry ` + -Sequence $postActionSequence ` + -Phase 'repository-contracts' ` + -Status 'pass' ` + -Message ("Verified repository contract for {0} managed repos." -f $repoTotal) + } else { + Add-PostActionSequenceEntry ` + -Sequence $postActionSequence ` + -Phase 'repository-contracts' ` + -Status 'fail' ` + -Message ("Repository contract checks failed for {0} managed repos." -f $repoFailureCount) + } } Write-InstallerFeedback -Message 'Syncing governance payload into workspace root.' @@ -1847,66 +1885,63 @@ try { -Status ([string]$cliBundle.status) ` -Message ([string]$cliBundle.message) - $originalWorktreeRoot = $env:LVIE_WORKTREE_ROOT - $effectiveWorktreeRoot = if ([string]::IsNullOrWhiteSpace($originalWorktreeRoot)) { $resolvedWorkspaceRoot } else { $originalWorktreeRoot } - $worktreeRootOverridden = $false - if ([string]::IsNullOrWhiteSpace($originalWorktreeRoot)) { - if (-not [string]::IsNullOrWhiteSpace($effectiveWorktreeRoot)) { - Write-InstallerFeedback -Message ("Setting LVIE_WORKTREE_ROOT to workspace root for post-actions: {0}" -f $effectiveWorktreeRoot) - $worktreeRootOverridden = $true + if ($isContainerSmokeInstall) { + foreach ($bitness in $requiredPplBitnesses) { + $pplCapabilityChecks[$bitness].status = 'skipped' + $pplCapabilityChecks[$bitness].message = 'ContainerSmoke context skips runner-cli PPL capability gates.' + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status 'skipped' -Message ([string]$pplCapabilityChecks[$bitness].message + ) } + $vipPackageBuildCheck.status = 'skipped' + $vipPackageBuildCheck.message = 'ContainerSmoke context skips runner-cli VIP harness gate.' + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message ([string]$vipPackageBuildCheck.message) } else { - Write-InstallerFeedback -Message ("Using existing LVIE_WORKTREE_ROOT for post-actions: {0}" -f $effectiveWorktreeRoot) - } - - try { - if (-not [string]::IsNullOrWhiteSpace($effectiveWorktreeRoot)) { - $env:LVIE_WORKTREE_ROOT = $effectiveWorktreeRoot + $originalWorktreeRoot = $env:LVIE_WORKTREE_ROOT + $effectiveWorktreeRoot = if ([string]::IsNullOrWhiteSpace($originalWorktreeRoot)) { $resolvedWorkspaceRoot } else { $originalWorktreeRoot } + $worktreeRootOverridden = $false + if ([string]::IsNullOrWhiteSpace($originalWorktreeRoot)) { + if (-not [string]::IsNullOrWhiteSpace($effectiveWorktreeRoot)) { + Write-InstallerFeedback -Message ("Setting LVIE_WORKTREE_ROOT to workspace root for post-actions: {0}" -f $effectiveWorktreeRoot) + $worktreeRootOverridden = $true + } + } else { + Write-InstallerFeedback -Message ("Using existing LVIE_WORKTREE_ROOT for post-actions: {0}" -f $effectiveWorktreeRoot) } - if ($runnerCliBundle.status -eq 'pass') { - $repoContractStatus = $repositoryResults | Where-Object { [string]$_.path -eq $iconEditorRepoPath } | Select-Object -First 1 - if ($null -eq $repoContractStatus -or [string]$repoContractStatus.status -ne 'pass') { - $blockingMessage = "Cannot run runner-cli PPL capability checks because icon-editor repo contract failed at '$iconEditorRepoPath'." - foreach ($bitness in $requiredPplBitnesses) { - $pplCapabilityChecks[$bitness].status = 'fail' - $pplCapabilityChecks[$bitness].message = $blockingMessage - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status 'fail' -Message $blockingMessage - } - if ($skipVipHarness) { - $vipPackageBuildCheck.status = 'skipped' - $vipPackageBuildCheck.message = "VIP harness is skipped for execution profile '$executionProfile'." - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message $vipPackageBuildCheck.message + try { + if (-not [string]::IsNullOrWhiteSpace($effectiveWorktreeRoot)) { + $env:LVIE_WORKTREE_ROOT = $effectiveWorktreeRoot + } + + if ($runnerCliBundle.status -eq 'pass') { + $repoContractStatus = $repositoryResults | Where-Object { [string]$_.path -eq $iconEditorRepoPath } | Select-Object -First 1 + if ($null -eq $repoContractStatus -or [string]$repoContractStatus.status -ne 'pass') { + $blockingMessage = "Cannot run runner-cli PPL capability checks because icon-editor repo contract failed at '$iconEditorRepoPath'." + foreach ($bitness in $requiredPplBitnesses) { + $pplCapabilityChecks[$bitness].status = 'fail' + $pplCapabilityChecks[$bitness].message = $blockingMessage + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status 'fail' -Message $blockingMessage + } + if ($skipVipHarness) { + $vipPackageBuildCheck.status = 'skipped' + $vipPackageBuildCheck.message = "VIP harness is skipped for execution profile '$executionProfile'." + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message $vipPackageBuildCheck.message + } else { + $vipPackageBuildCheck.status = 'blocked' + $vipPackageBuildCheck.message = 'VIP harness was not run because icon-editor repository contract failed.' + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'blocked' -Message $vipPackageBuildCheck.message + } + $errors += $blockingMessage } else { - $vipPackageBuildCheck.status = 'blocked' - $vipPackageBuildCheck.message = 'VIP harness was not run because icon-editor repository contract failed.' - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'blocked' -Message $vipPackageBuildCheck.message - } - $errors += $blockingMessage - } else { - $allPplPass = $true - foreach ($bitness in $requiredPplBitnesses) { - Write-InstallerFeedback -Message ("Running pre-PPL LabVIEW close sweep before {0}-bit gate." -f $bitness) - Invoke-PreVipLabVIEWCloseBestEffort -IconEditorRepoPath $iconEditorRepoPath - Start-Sleep -Seconds 3 - - try { - Write-InstallerFeedback -Message ("Running runner-cli PPL capability gate ({0}-bit)." -f $bitness) - $capabilityResult = Invoke-RunnerCliPplCapabilityCheck ` - -RunnerCliPath $runnerCliExePath ` - -IconEditorRepoPath $iconEditorRepoPath ` - -PinnedSha $iconEditorPinnedSha ` - -RequiredLabviewYear ([string]$requiredLabviewYear) ` - -ExpectedExecutionLabviewYear ([string]$pplExpectedExecutionLabviewYear) ` - -RequiredBitness $bitness - - if ([string]$capabilityResult.status -ne 'pass') { - $firstAttemptMessage = [string]$capabilityResult.message - Write-InstallerFeedback -Message ("runner-cli PPL capability gate ({0}-bit) failed on first attempt; retrying once after additional LabVIEW cleanup." -f $bitness) - Invoke-PreVipLabVIEWCloseBestEffort -IconEditorRepoPath $iconEditorRepoPath - Start-Sleep -Seconds 5 + $allPplPass = $true + foreach ($bitness in $requiredPplBitnesses) { + Write-InstallerFeedback -Message ("Running pre-PPL LabVIEW close sweep before {0}-bit gate." -f $bitness) + Invoke-PreVipLabVIEWCloseBestEffort -IconEditorRepoPath $iconEditorRepoPath + Start-Sleep -Seconds 3 - $retryCapabilityResult = Invoke-RunnerCliPplCapabilityCheck ` + try { + Write-InstallerFeedback -Message ("Running runner-cli PPL capability gate ({0}-bit)." -f $bitness) + $capabilityResult = Invoke-RunnerCliPplCapabilityCheck ` -RunnerCliPath $runnerCliExePath ` -IconEditorRepoPath $iconEditorRepoPath ` -PinnedSha $iconEditorPinnedSha ` @@ -1914,119 +1949,134 @@ try { -ExpectedExecutionLabviewYear ([string]$pplExpectedExecutionLabviewYear) ` -RequiredBitness $bitness - if ([string]$retryCapabilityResult.status -eq 'pass') { - $retryCapabilityResult.message = ("{0} (passed on retry after additional cleanup)." -f [string]$retryCapabilityResult.message) - $capabilityResult = $retryCapabilityResult - } else { - $retryCapabilityResult.message = ("First attempt: {0} Retry attempt: {1}" -f $firstAttemptMessage, [string]$retryCapabilityResult.message) - $capabilityResult = $retryCapabilityResult + if ([string]$capabilityResult.status -ne 'pass') { + $firstAttemptMessage = [string]$capabilityResult.message + Write-InstallerFeedback -Message ("runner-cli PPL capability gate ({0}-bit) failed on first attempt; retrying once after additional LabVIEW cleanup." -f $bitness) + Invoke-PreVipLabVIEWCloseBestEffort -IconEditorRepoPath $iconEditorRepoPath + Start-Sleep -Seconds 5 + + $retryCapabilityResult = Invoke-RunnerCliPplCapabilityCheck ` + -RunnerCliPath $runnerCliExePath ` + -IconEditorRepoPath $iconEditorRepoPath ` + -PinnedSha $iconEditorPinnedSha ` + -RequiredLabviewYear ([string]$requiredLabviewYear) ` + -ExpectedExecutionLabviewYear ([string]$pplExpectedExecutionLabviewYear) ` + -RequiredBitness $bitness + + if ([string]$retryCapabilityResult.status -eq 'pass') { + $retryCapabilityResult.message = ("{0} (passed on retry after additional cleanup)." -f [string]$retryCapabilityResult.message) + $capabilityResult = $retryCapabilityResult + } else { + $retryCapabilityResult.message = ("First attempt: {0} Retry attempt: {1}" -f $firstAttemptMessage, [string]$retryCapabilityResult.message) + $capabilityResult = $retryCapabilityResult + } } - } - $pplCapabilityChecks[$bitness] = [ordered]@{ - status = [string]$capabilityResult.status - message = [string]$capabilityResult.message - runner_cli_path = [string]$capabilityResult.runner_cli_path - repo_path = [string]$capabilityResult.repo_path - required_labview_year = [string]$capabilityResult.required_labview_year - expected_execution_labview_year = [string]$capabilityResult.expected_execution_labview_year - required_bitness = [string]$capabilityResult.required_bitness - output_ppl_path = [string]$capabilityResult.output_ppl_path - output_ppl_snapshot_path = [string]$capabilityResult.output_ppl_snapshot_path - command = @($capabilityResult.command) - exit_code = $capabilityResult.exit_code - labview_install_root = [string]$capabilityResult.labview_install_root - labview_ini_path = [string]$capabilityResult.labview_ini_path - expected_labview_cli_port = $capabilityResult.expected_labview_cli_port - buildspec_log_path = [string]$capabilityResult.buildspec_log_path - detected_labview_executable = [string]$capabilityResult.detected_labview_executable - detected_labview_year = [string]$capabilityResult.detected_labview_year - } + $pplCapabilityChecks[$bitness] = [ordered]@{ + status = [string]$capabilityResult.status + message = [string]$capabilityResult.message + runner_cli_path = [string]$capabilityResult.runner_cli_path + repo_path = [string]$capabilityResult.repo_path + required_labview_year = [string]$capabilityResult.required_labview_year + expected_execution_labview_year = [string]$capabilityResult.expected_execution_labview_year + required_bitness = [string]$capabilityResult.required_bitness + output_ppl_path = [string]$capabilityResult.output_ppl_path + output_ppl_snapshot_path = [string]$capabilityResult.output_ppl_snapshot_path + command = @($capabilityResult.command) + exit_code = $capabilityResult.exit_code + labview_install_root = [string]$capabilityResult.labview_install_root + labview_ini_path = [string]$capabilityResult.labview_ini_path + expected_labview_cli_port = $capabilityResult.expected_labview_cli_port + buildspec_log_path = [string]$capabilityResult.buildspec_log_path + detected_labview_executable = [string]$capabilityResult.detected_labview_executable + detected_labview_year = [string]$capabilityResult.detected_labview_year + } - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status ([string]$capabilityResult.status) -Message ([string]$capabilityResult.message) + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status ([string]$capabilityResult.status) -Message ([string]$capabilityResult.message) - if ([string]$capabilityResult.status -ne 'pass') { - $allPplPass = $false - $errors += "Runner CLI PPL capability check failed ($bitness-bit). $([string]$capabilityResult.message)" + if ([string]$capabilityResult.status -ne 'pass') { + $allPplPass = $false + $errors += "Runner CLI PPL capability check failed ($bitness-bit). $([string]$capabilityResult.message)" + } + } finally { + Write-InstallerFeedback -Message ("Running post-PPL LabVIEW close sweep after {0}-bit gate." -f $bitness) + Invoke-PreVipLabVIEWCloseBestEffort -IconEditorRepoPath $iconEditorRepoPath } - } finally { - Write-InstallerFeedback -Message ("Running post-PPL LabVIEW close sweep after {0}-bit gate." -f $bitness) - Invoke-PreVipLabVIEWCloseBestEffort -IconEditorRepoPath $iconEditorRepoPath } - } - if (-not $allPplPass) { - if ($skipVipHarness) { - $vipPackageBuildCheck.status = 'skipped' - $vipPackageBuildCheck.message = "VIP harness is skipped for execution profile '$executionProfile'." - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message $vipPackageBuildCheck.message - } else { - $vipPackageBuildCheck.status = 'blocked' - $vipPackageBuildCheck.message = 'VIP harness was not run because one or more PPL capability checks failed.' - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'blocked' -Message $vipPackageBuildCheck.message - } - } else { - if ($skipVipHarness) { - $vipPackageBuildCheck.status = 'skipped' - $vipPackageBuildCheck.message = "VIP harness is skipped for execution profile '$executionProfile'." - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message $vipPackageBuildCheck.message + if (-not $allPplPass) { + if ($skipVipHarness) { + $vipPackageBuildCheck.status = 'skipped' + $vipPackageBuildCheck.message = "VIP harness is skipped for execution profile '$executionProfile'." + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message $vipPackageBuildCheck.message + } else { + $vipPackageBuildCheck.status = 'blocked' + $vipPackageBuildCheck.message = 'VIP harness was not run because one or more PPL capability checks failed.' + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'blocked' -Message $vipPackageBuildCheck.message + } } else { - Write-InstallerFeedback -Message 'Running runner-cli VI Package harness gate.' - $vipResult = Invoke-RunnerCliVipPackageHarnessCheck ` - -RunnerCliPath $runnerCliExePath ` - -PowerShellExecutable $runtimePowerShellExecutable ` - -IconEditorRepoPath $iconEditorRepoPath ` - -PinnedSha $iconEditorPinnedSha ` - -RequiredLabviewYear ([string]$requiredLabviewYear) ` - -RequiredBitness ([string]$requiredVipBitness) - - $vipPackageBuildCheck = [ordered]@{ - status = [string]$vipResult.status - message = [string]$vipResult.message - runner_cli_path = [string]$vipResult.runner_cli_path - repo_path = [string]$vipResult.repo_path - required_labview_year = [string]$vipResult.required_labview_year - required_bitness = [string]$vipResult.required_bitness - vipb_path = [string]$vipResult.vipb_path - vipc_path = [string]$vipResult.vipc_path - vipc_assert_output_path = [string]$vipResult.vipc_assert_output_path - vip_build_status_path = [string]$vipResult.vip_build_status_path - release_notes_path = [string]$vipResult.release_notes_path - display_information_path = [string]$vipResult.display_information_path - output_vip_path = [string]$vipResult.output_vip_path - command = [ordered]@{ - vipc_assert = @($vipResult.command.vipc_assert) - vipc_apply = @($vipResult.command.vipc_apply) - vip_build = @($vipResult.command.vip_build) + if ($skipVipHarness) { + $vipPackageBuildCheck.status = 'skipped' + $vipPackageBuildCheck.message = "VIP harness is skipped for execution profile '$executionProfile'." + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'skipped' -Message $vipPackageBuildCheck.message + } else { + Write-InstallerFeedback -Message 'Running runner-cli VI Package harness gate.' + $vipResult = Invoke-RunnerCliVipPackageHarnessCheck ` + -RunnerCliPath $runnerCliExePath ` + -PowerShellExecutable $runtimePowerShellExecutable ` + -IconEditorRepoPath $iconEditorRepoPath ` + -PinnedSha $iconEditorPinnedSha ` + -RequiredLabviewYear ([string]$requiredLabviewYear) ` + -RequiredBitness ([string]$requiredVipBitness) + + $vipPackageBuildCheck = [ordered]@{ + status = [string]$vipResult.status + message = [string]$vipResult.message + runner_cli_path = [string]$vipResult.runner_cli_path + repo_path = [string]$vipResult.repo_path + required_labview_year = [string]$vipResult.required_labview_year + required_bitness = [string]$vipResult.required_bitness + vipb_path = [string]$vipResult.vipb_path + vipc_path = [string]$vipResult.vipc_path + vipc_assert_output_path = [string]$vipResult.vipc_assert_output_path + vip_build_status_path = [string]$vipResult.vip_build_status_path + release_notes_path = [string]$vipResult.release_notes_path + display_information_path = [string]$vipResult.display_information_path + output_vip_path = [string]$vipResult.output_vip_path + command = [ordered]@{ + vipc_assert = @($vipResult.command.vipc_assert) + vipc_apply = @($vipResult.command.vipc_apply) + vip_build = @($vipResult.command.vip_build) + } + exit_code = $vipResult.exit_code + labview_install_root = [string]$vipResult.labview_install_root } - exit_code = $vipResult.exit_code - labview_install_root = [string]$vipResult.labview_install_root - } - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status ([string]$vipResult.status) -Message ([string]$vipResult.message) + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status ([string]$vipResult.status) -Message ([string]$vipResult.message) - if ($vipPackageBuildCheck.status -ne 'pass') { - $errors += "Runner CLI VIP harness check failed. $($vipPackageBuildCheck.message)" + if ($vipPackageBuildCheck.status -ne 'pass') { + $errors += "Runner CLI VIP harness check failed. $($vipPackageBuildCheck.message)" + } } } } - } - } else { - $bundleBlockingMessage = "Runner CLI bundle verification failed; capability gates were not run. $([string]$runnerCliBundle.message)" - foreach ($bitness in $requiredPplBitnesses) { - $pplCapabilityChecks[$bitness].status = 'blocked' - $pplCapabilityChecks[$bitness].message = $bundleBlockingMessage - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status 'blocked' -Message $bundleBlockingMessage - } - $vipPackageBuildCheck.status = 'blocked' - $vipPackageBuildCheck.message = 'VIP harness was not run because runner-cli bundle verification failed.' - Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'blocked' -Message $vipPackageBuildCheck.message - } - } finally { - if ($worktreeRootOverridden) { - if ([string]::IsNullOrWhiteSpace($originalWorktreeRoot)) { - Remove-Item Env:LVIE_WORKTREE_ROOT -ErrorAction SilentlyContinue } else { - $env:LVIE_WORKTREE_ROOT = $originalWorktreeRoot + $bundleBlockingMessage = "Runner CLI bundle verification failed; capability gates were not run. $([string]$runnerCliBundle.message)" + foreach ($bitness in $requiredPplBitnesses) { + $pplCapabilityChecks[$bitness].status = 'blocked' + $pplCapabilityChecks[$bitness].message = $bundleBlockingMessage + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'ppl-build' -Bitness $bitness -Status 'blocked' -Message $bundleBlockingMessage + } + $vipPackageBuildCheck.status = 'blocked' + $vipPackageBuildCheck.message = 'VIP harness was not run because runner-cli bundle verification failed.' + Add-PostActionSequenceEntry -Sequence $postActionSequence -Phase 'vip-harness' -Bitness $requiredVipBitness -Status 'blocked' -Message $vipPackageBuildCheck.message + } + } finally { + if ($worktreeRootOverridden) { + if ([string]::IsNullOrWhiteSpace($originalWorktreeRoot)) { + Remove-Item Env:LVIE_WORKTREE_ROOT -ErrorAction SilentlyContinue + } else { + $env:LVIE_WORKTREE_ROOT = $originalWorktreeRoot + } } } } diff --git a/scripts/Invoke-LinuxContainerNsisParity.ps1 b/scripts/Invoke-LinuxContainerNsisParity.ps1 new file mode 100644 index 0000000..94230d8 --- /dev/null +++ b/scripts/Invoke-LinuxContainerNsisParity.ps1 @@ -0,0 +1,275 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [string]$Image = 'labview-cdev-surface-nsis-linux-parity:local', + + [Parameter()] + [switch]$BuildLocalImage, + + [Parameter()] + [string]$DockerfilePath = (Join-Path (Split-Path -Parent $PSScriptRoot) 'tools\nsis-selftest-linux\Dockerfile'), + + [Parameter()] + [string]$DockerContext = 'desktop-linux', + + [Parameter()] + [string]$OutputRoot = (Join-Path (Split-Path -Parent $PSScriptRoot) 'artifacts\release\linux-container-nsis-parity'), + + [Parameter()] + [string]$ContainerRepoMount = '/repo', + + [Parameter()] + [string]$ContainerOutputMount = '/hostout', + + [Parameter()] + [ValidatePattern('^[a-z0-9][a-z0-9_.-]{2,50}$')] + [string]$ContainerNamePrefix = 'lvie-cdev-linux-nsis', + + [Parameter()] + [switch]$KeepContainerScript +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +function Ensure-Directory { + param([Parameter(Mandatory = $true)][string]$Path) + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Assert-Command { + param([Parameter(Mandatory = $true)][string]$Name) + if (-not (Get-Command $Name -ErrorAction SilentlyContinue)) { + throw "Required command '$Name' was not found on PATH." + } +} + +function Get-CommandOutputOrThrow { + param( + [Parameter(Mandatory = $true)][string]$Command, + [Parameter(Mandatory = $true)][string[]]$Arguments + ) + + & $Command @Arguments + if ($LASTEXITCODE -ne 0) { + throw ("Command failed: {0} {1} (exit={2})" -f $Command, ($Arguments -join ' '), $LASTEXITCODE) + } +} + +$repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path +$resolvedDockerfilePath = [System.IO.Path]::GetFullPath($DockerfilePath) +$dockerBuildContext = [System.IO.Path]::GetDirectoryName($resolvedDockerfilePath) +$resolvedOutputRoot = [System.IO.Path]::GetFullPath($OutputRoot) +$containerScriptPath = Join-Path $resolvedOutputRoot 'container-run.sh' +$containerReportPath = Join-Path $resolvedOutputRoot 'container-report.json' +$hostReportPath = Join-Path $resolvedOutputRoot 'linux-container-nsis-parity-report.json' + +Assert-Command -Name 'docker' + +if (-not (Test-Path -LiteralPath $resolvedDockerfilePath -PathType Leaf)) { + throw "Dockerfile not found: $resolvedDockerfilePath" +} + +Ensure-Directory -Path $resolvedOutputRoot +if (Test-Path -LiteralPath $containerReportPath -PathType Leaf) { + Remove-Item -LiteralPath $containerReportPath -Force +} + +$contextArgs = @() +if (-not [string]::IsNullOrWhiteSpace($DockerContext)) { + $contextArgs += @('--context', $DockerContext) +} + +Get-CommandOutputOrThrow -Command 'docker' -Arguments @($contextArgs + @('info')) + +if ($BuildLocalImage) { + $buildArgs = @($contextArgs + @('build', '-f', $resolvedDockerfilePath, '-t', $Image, $dockerBuildContext)) + Get-CommandOutputOrThrow -Command 'docker' -Arguments $buildArgs +} + +$containerScriptContent = @' +#!/usr/bin/env bash +set -euo pipefail + +repo_root="__REPO_MOUNT__" +host_out="__OUTPUT_MOUNT__" +work_root="/tmp/nsis-linux-parity" +smoke_nsi="$work_root/nsis-smoke.nsi" +smoke_installer="$host_out/nsis-linux-parity-smoke.exe" +makensis_log="$host_out/makensis-linux-parity.log" + +mkdir -p "$host_out" +rm -rf "$work_root" +mkdir -p "$work_root" + +status="succeeded" +reason_code="" +compile_status="not_run" +smoke_sha256="" +missing_commands=() + +# LabVIEWCLI binary casing varies by image surface; support both deterministic probes. +if ! command -v "labviewcli" >/dev/null 2>&1 && ! command -v "LabVIEWCLI" >/dev/null 2>&1; then + missing_commands+=("LabVIEWCLI") +fi + +for command_name in makensis git dotnet pwsh; do + if ! command -v "$command_name" >/dev/null 2>&1; then + missing_commands+=("$command_name") + fi +done + +if [[ ${#missing_commands[@]} -gt 0 ]]; then + status="failed" + reason_code="toolchain_missing" +fi + +cat > "$smoke_nsi" <<'NSIS' +Unicode True +OutFile "/hostout/nsis-linux-parity-smoke.exe" +Section +SectionEnd +NSIS + +if [[ "$status" == "succeeded" ]]; then + if makensis -V2 "$smoke_nsi" >"$makensis_log" 2>&1; then + compile_status="pass" + else + compile_status="fail" + status="failed" + reason_code="nsis_compile_failed" + fi +fi + +if [[ -f "$smoke_installer" ]]; then + smoke_sha256="$(sha256sum "$smoke_installer" | awk '{print $1}')" +fi + +missing_csv="" +if [[ ${#missing_commands[@]} -gt 0 ]]; then + missing_csv="$(IFS=,; echo "${missing_commands[*]}")" +fi + +jq -n \ + --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ + --arg status "$status" \ + --arg reason_code "$reason_code" \ + --arg missing_csv "$missing_csv" \ + --arg compile_status "$compile_status" \ + --arg makensis_log "$makensis_log" \ + --arg smoke_installer "$smoke_installer" \ + --arg smoke_sha256 "$smoke_sha256" \ + --arg installer_execution_status "skipped" \ + --arg installer_execution_reason "windows_installer_not_executable_on_linux" \ + '{ + timestamp_utc: $timestamp, + status: $status, + reason_code: $reason_code, + missing_commands: (if $missing_csv == "" then [] else ($missing_csv | split(",")) end), + nsis_compile_status: $compile_status, + makensis_log_path: $makensis_log, + smoke_installer_path: $smoke_installer, + smoke_installer_sha256: $smoke_sha256, + installer_execution_status: $installer_execution_status, + installer_execution_reason: $installer_execution_reason + }' > "$host_out/container-report.json" + +if [[ "$status" != "succeeded" ]]; then + exit 1 +fi + +exit 0 +'@ + +$containerScriptContent = $containerScriptContent.Replace('__REPO_MOUNT__', $ContainerRepoMount) +$containerScriptContent = $containerScriptContent.Replace('__OUTPUT_MOUNT__', $ContainerOutputMount) +$containerScriptContent = $containerScriptContent -replace "`r`n", "`n" +[System.IO.File]::WriteAllText($containerScriptPath, $containerScriptContent, [System.Text.UTF8Encoding]::new($false)) + +$containerName = ('{0}-{1}' -f $ContainerNamePrefix, ([guid]::NewGuid().ToString('n').Substring(0, 12))).ToLowerInvariant() +$dockerRepoVolume = ('{0}:{1}' -f $repoRoot, $ContainerRepoMount) +$dockerOutputVolume = ('{0}:{1}' -f $resolvedOutputRoot, $ContainerOutputMount) +$containerScriptInContainer = if ($ContainerOutputMount.EndsWith('/')) { + "$ContainerOutputMount" + 'container-run.sh' +} else { + "$ContainerOutputMount/container-run.sh" +} +$containerExitCode = 0 +$status = 'unknown' +$errors = @() +$containerReport = $null +$startedUtc = (Get-Date).ToUniversalTime() + +try { + $runArgs = @($contextArgs + @( + 'run', + '--rm', + '--name', $containerName, + '-v', $dockerRepoVolume, + '-v', $dockerOutputVolume, + $Image, + 'bash', $containerScriptInContainer + )) + + & docker @runArgs + $containerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($containerExitCode -ne 0) { + throw "docker run failed with exit code $containerExitCode" + } + + if (-not (Test-Path -LiteralPath $containerReportPath -PathType Leaf)) { + throw "Container report missing: $containerReportPath" + } + + $containerReport = Get-Content -LiteralPath $containerReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + if ([string]$containerReport.status -ne 'succeeded') { + throw ("Container report status is '{0}' (expected 'succeeded')." -f [string]$containerReport.status) + } + + $status = 'succeeded' +} catch { + if ($containerExitCode -eq 0) { + $containerExitCode = 1 + } + $status = 'failed' + $errors += $_.Exception.Message + if (Test-Path -LiteralPath $containerReportPath -PathType Leaf) { + try { + $containerReport = Get-Content -LiteralPath $containerReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + } catch { + $errors += "Failed to parse container report JSON: $($_.Exception.Message)" + } + } +} + +$endedUtc = (Get-Date).ToUniversalTime() +[ordered]@{ + timestamp_utc = $endedUtc.ToString('o') + started_utc = $startedUtc.ToString('o') + status = $status + image = $Image + build_local_image = [bool]$BuildLocalImage + dockerfile = $resolvedDockerfilePath + docker_context = $DockerContext + container_name = $containerName + output_root = $resolvedOutputRoot + container_exit_code = $containerExitCode + container_report_path = $containerReportPath + container_report = $containerReport + errors = $errors +} | ConvertTo-Json -Depth 12 | Set-Content -LiteralPath $hostReportPath -Encoding utf8 + +if (-not $KeepContainerScript -and (Test-Path -LiteralPath $containerScriptPath -PathType Leaf)) { + Remove-Item -LiteralPath $containerScriptPath -Force +} + +Write-Host "Linux container NSIS parity report: $hostReportPath" + +if ($status -ne 'succeeded') { + exit 1 +} + +exit 0 diff --git a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 new file mode 100644 index 0000000..d9b8dc0 --- /dev/null +++ b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 @@ -0,0 +1,410 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [string]$Image = 'labview-cdev-surface-nsis-selftest:local', + + [Parameter()] + [switch]$BuildLocalImage, + + [Parameter()] + [string]$DockerfilePath = (Join-Path (Split-Path -Parent $PSScriptRoot) 'tools\nsis-selftest-windows\Dockerfile'), + + [Parameter()] + [string]$DockerContext = '', + + [Parameter()] + [string]$OutputRoot = (Join-Path (Split-Path -Parent $PSScriptRoot) 'artifacts\release\windows-container-nsis-selftest'), + + [Parameter()] + [string]$HostNsisRoot = 'C:\Program Files (x86)\NSIS', + + [Parameter()] + [string]$ContainerWorkspaceRoot = 'C:\dev-smoke-lvie', + + [Parameter()] + [string]$ContainerRepoMount = 'C:\repo', + + [Parameter()] + [string]$ContainerOutputMount = 'C:\hostout', + + [Parameter()] + [string]$ContainerPayloadMount = 'C:\payload', + + [Parameter()] + [string]$ContainerNsisMount = 'C:\nsis', + + [Parameter()] + [ValidatePattern('^[a-z0-9][a-z0-9_.-]{2,50}$')] + [string]$ContainerNamePrefix = 'lvie-cdev-nsis-smoke', + + [Parameter()] + [switch]$KeepContainerScript, + + [Parameter()] + [switch]$KeepContainerOnFailure +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +function Ensure-Directory { + param([Parameter(Mandatory = $true)][string]$Path) + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Assert-Command { + param([Parameter(Mandatory = $true)][string]$Name) + if (-not (Get-Command $Name -ErrorAction SilentlyContinue)) { + throw "Required command '$Name' was not found on PATH." + } +} + +function Get-CommandOutputOrThrow { + param( + [Parameter(Mandatory = $true)][string]$Command, + [Parameter(Mandatory = $true)][string[]]$Arguments + ) + + & $Command @Arguments + if ($LASTEXITCODE -ne 0) { + throw ("Command failed: {0} {1} (exit={2})" -f $Command, ($Arguments -join ' '), $LASTEXITCODE) + } +} + +function Convert-ToSingleQuotedLiteral { + param([Parameter(Mandatory = $true)][string]$Value) + return $Value.Replace("'", "''") +} + +$repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path +$resolvedDockerfilePath = [System.IO.Path]::GetFullPath($DockerfilePath) +$dockerBuildContext = [System.IO.Path]::GetDirectoryName($resolvedDockerfilePath) +$resolvedOutputRoot = [System.IO.Path]::GetFullPath($OutputRoot) +$resolvedHostNsisRoot = [System.IO.Path]::GetFullPath($HostNsisRoot) +$hostMakensisPath = Join-Path $resolvedHostNsisRoot 'makensis.exe' +$containerScriptPath = Join-Path $resolvedOutputRoot 'container-run.ps1' +$containerReportPath = Join-Path $resolvedOutputRoot 'container-report.json' +$hostReportPath = Join-Path $resolvedOutputRoot 'windows-container-nsis-selftest-report.json' +$hostPayloadRoot = Join-Path $resolvedOutputRoot 'payload-host' +$hostPayloadManifestPath = Join-Path $hostPayloadRoot 'workspace-governance\workspace-governance.json' +$hostRunnerCliOutputRoot = Join-Path $hostPayloadRoot 'tools\runner-cli\win-x64' +$buildInstallerScript = Join-Path $repoRoot 'scripts\Build-WorkspaceBootstrapInstaller.ps1' +$buildRunnerCliScript = Join-Path $repoRoot 'scripts\Build-RunnerCliBundleFromManifest.ps1' +$convertManifestScript = Join-Path $repoRoot 'scripts\Convert-ManifestToWorkspace.ps1' +$installScript = Join-Path $repoRoot 'scripts\Install-WorkspaceFromManifest.ps1' +$canonicalPayloadRoot = Join-Path $repoRoot 'workspace-governance-payload' + +Assert-Command -Name 'docker' +Assert-Command -Name 'powershell' +Assert-Command -Name 'git' +Assert-Command -Name 'dotnet' + +if (-not (Test-Path -LiteralPath $resolvedDockerfilePath -PathType Leaf)) { + throw "Dockerfile not found: $resolvedDockerfilePath" +} +foreach ($requiredPath in @($buildInstallerScript, $buildRunnerCliScript, $convertManifestScript, $installScript)) { + if (-not (Test-Path -LiteralPath $requiredPath -PathType Leaf)) { + throw "Required script not found: $requiredPath" + } +} +if (-not (Test-Path -LiteralPath $canonicalPayloadRoot -PathType Container)) { + throw "Canonical payload root not found: $canonicalPayloadRoot" +} +if (-not (Test-Path -LiteralPath $hostMakensisPath -PathType Leaf)) { + throw ("host_nsis_missing: expected '{0}'. Install NSIS on host or pass -HostNsisRoot." -f $hostMakensisPath) +} + +Ensure-Directory -Path $resolvedOutputRoot +if (Test-Path -LiteralPath $containerReportPath -PathType Leaf) { + Remove-Item -LiteralPath $containerReportPath -Force +} +if (Test-Path -LiteralPath $hostPayloadRoot -PathType Container) { + Remove-Item -LiteralPath $hostPayloadRoot -Recurse -Force +} +Ensure-Directory -Path $hostPayloadRoot + +Copy-Item -Path (Join-Path $canonicalPayloadRoot '*') -Destination $hostPayloadRoot -Recurse -Force +Ensure-Directory -Path (Join-Path $hostPayloadRoot 'scripts') +Ensure-Directory -Path $hostRunnerCliOutputRoot +Copy-Item -LiteralPath $installScript -Destination (Join-Path $hostPayloadRoot 'scripts\Install-WorkspaceFromManifest.ps1') -Force + +& $buildRunnerCliScript ` + -ManifestPath $hostPayloadManifestPath ` + -OutputRoot $hostRunnerCliOutputRoot ` + -RepoName 'labview-icon-editor' ` + -Runtime 'win-x64' ` + -Deterministic:$true | Out-Host +if ($LASTEXITCODE -ne 0) { + throw "Build-RunnerCliBundleFromManifest.ps1 failed with exit code $LASTEXITCODE" +} + +& $convertManifestScript -ManifestPath $hostPayloadManifestPath -WorkspaceRoot $ContainerWorkspaceRoot | Out-Null +if ($LASTEXITCODE -ne 0) { + throw "Convert-ManifestToWorkspace.ps1 failed with exit code $LASTEXITCODE" +} + +$contextArgs = @() +if (-not [string]::IsNullOrWhiteSpace($DockerContext)) { + $contextArgs += @('--context', $DockerContext) +} + +$dockerOsTypeRaw = & docker @($contextArgs + @('info', '--format', '{{.OSType}}')) 2>$null +if ($LASTEXITCODE -ne 0) { + throw ("docker_info_failed: unable to query Docker engine OSType for context '{0}'." -f $DockerContext) +} +$dockerOsType = ([string]$dockerOsTypeRaw).Trim().ToLowerInvariant() +if ($dockerOsType -ne 'windows') { + throw ("windows_container_mode_required: Docker engine OSType is '{0}' for context '{1}'. Switch Docker Desktop to Windows containers or use a Windows-engine context." -f $dockerOsType, $DockerContext) +} + +if ($BuildLocalImage) { + $buildArgs = @($contextArgs + @('build', '-f', $resolvedDockerfilePath, '-t', $Image, $dockerBuildContext)) + Get-CommandOutputOrThrow -Command 'docker' -Arguments $buildArgs +} + +$containerScriptTemplate = @' +[CmdletBinding()] +param() + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +function Ensure-Directory { + param([Parameter(Mandatory = $true)][string]$Path) + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Assert-Command { + param([Parameter(Mandatory = $true)][string]$Name) + if (-not (Get-Command $Name -ErrorAction SilentlyContinue)) { + throw "Required command '$Name' was not found on PATH in container." + } +} + +$repoRoot = '__REPO_MOUNT__' +$payloadRoot = '__PAYLOAD_MOUNT__' +$nsisRoot = '__NSIS_MOUNT__' +$hostOut = '__OUTPUT_MOUNT__' +$workspaceRoot = '__WORKSPACE_ROOT__' +$workRoot = 'C:\workspace\nsis-selftest' +$installerPath = Join-Path $workRoot 'lvie-cdev-workspace-installer-container-smoke.exe' +$installReportPath = Join-Path $workspaceRoot 'artifacts\workspace-install-latest.json' +$launchLogPath = Join-Path $workspaceRoot 'artifacts\workspace-installer-launch.log' +$buildInstallerScript = Join-Path $repoRoot 'scripts\Build-WorkspaceBootstrapInstaller.ps1' +$containerMakensisPath = Join-Path $nsisRoot 'makensis.exe' + +$containerStatus = 'unknown' +$errorMessage = '' +$reasonCode = '' +$installerSha256 = '' +$installerExitCode = 0 +$installReportStatus = '' +$installReportErrors = @() +$installReportWarnings = @() + +try { + Assert-Command -Name 'powershell' + if (-not (Test-Path -LiteralPath $buildInstallerScript -PathType Leaf)) { + throw "Required build script missing in mounted repo: $buildInstallerScript" + } + if (-not (Test-Path -LiteralPath $payloadRoot -PathType Container)) { + throw "Mounted payload root not found: $payloadRoot" + } + if (-not (Test-Path -LiteralPath $containerMakensisPath -PathType Leaf)) { + throw "Mounted NSIS binary not found: $containerMakensisPath" + } + + if (Test-Path -LiteralPath $workRoot -PathType Container) { + Remove-Item -LiteralPath $workRoot -Recurse -Force + } + if (Test-Path -LiteralPath $workspaceRoot -PathType Container) { + Remove-Item -LiteralPath $workspaceRoot -Recurse -Force + } + Ensure-Directory -Path $workRoot + Ensure-Directory -Path $workspaceRoot + + & powershell -NoProfile -ExecutionPolicy Bypass -File $buildInstallerScript ` + -PayloadRoot $payloadRoot ` + -OutputPath $installerPath ` + -WorkspaceRootDefault $workspaceRoot ` + -InstallerExecutionContext 'ContainerSmoke' ` + -NsisRoot $nsisRoot ` + -Deterministic:$true | Out-Host + if ($LASTEXITCODE -ne 0) { + throw "Build-WorkspaceBootstrapInstaller.ps1 failed in container with exit code $LASTEXITCODE" + } + if (-not (Test-Path -LiteralPath $installerPath -PathType Leaf)) { + throw "Installer output not found: $installerPath" + } + + $installerSha256 = (Get-FileHash -LiteralPath $installerPath -Algorithm SHA256).Hash.ToLowerInvariant() + "{0} *{1}" -f $installerSha256, (Split-Path -Path $installerPath -Leaf) | Set-Content -LiteralPath "$installerPath.sha256" -Encoding ascii + + & $installerPath '/S' | Out-Host + $installerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($installerExitCode -ne 0) { + $reasonCode = 'installer_exit_nonzero' + throw "Installer failed in container with exit code $installerExitCode" + } + + if (-not (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { + $reasonCode = 'install_report_missing' + throw "Install report not found after container smoke install: $installReportPath" + } + + $installReport = Get-Content -LiteralPath $installReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $installReportStatus = [string]$installReport.status + $installReportErrors = @($installReport.errors) + $installReportWarnings = @($installReport.warnings) + if ($installReportStatus -ne 'succeeded') { + $reasonCode = 'install_report_failed' + throw "Install report status is '$installReportStatus' (expected 'succeeded')." + } + + Copy-Item -LiteralPath $installerPath -Destination (Join-Path $hostOut 'lvie-cdev-workspace-installer-container-smoke.exe') -Force + Copy-Item -LiteralPath "$installerPath.sha256" -Destination (Join-Path $hostOut 'lvie-cdev-workspace-installer-container-smoke.exe.sha256') -Force + Copy-Item -LiteralPath $installReportPath -Destination (Join-Path $hostOut 'workspace-install-latest.container-smoke.json') -Force + if (Test-Path -LiteralPath $launchLogPath -PathType Leaf) { + Copy-Item -LiteralPath $launchLogPath -Destination (Join-Path $hostOut 'workspace-installer-launch.container-smoke.log') -Force + } + + $containerStatus = 'succeeded' +} catch { + if ([string]::IsNullOrWhiteSpace($reasonCode)) { + $reasonCode = 'container_smoke_failed' + } + $containerStatus = 'failed' + $errorMessage = $_.Exception.Message +} + +[ordered]@{ + timestamp_utc = (Get-Date).ToUniversalTime().ToString('o') + status = $containerStatus + reason_code = $reasonCode + repo_root = $repoRoot + payload_root = $payloadRoot + nsis_root = $nsisRoot + host_output = $hostOut + workspace_root = $workspaceRoot + work_root = $workRoot + installer_path = $installerPath + installer_sha256 = $installerSha256 + installer_exit_code = $installerExitCode + install_report_path = $installReportPath + install_report_status = $installReportStatus + install_report_errors = @($installReportErrors) + install_report_warnings = @($installReportWarnings) + launch_log_path = $launchLogPath + error_message = $errorMessage +} | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath (Join-Path $hostOut 'container-report.json') -Encoding utf8 + +if ($containerStatus -ne 'succeeded') { + exit 1 +} +exit 0 +'@ + +$containerScriptContent = $containerScriptTemplate +$containerScriptContent = $containerScriptContent.Replace('__REPO_MOUNT__', (Convert-ToSingleQuotedLiteral -Value $ContainerRepoMount)) +$containerScriptContent = $containerScriptContent.Replace('__PAYLOAD_MOUNT__', (Convert-ToSingleQuotedLiteral -Value $ContainerPayloadMount)) +$containerScriptContent = $containerScriptContent.Replace('__NSIS_MOUNT__', (Convert-ToSingleQuotedLiteral -Value $ContainerNsisMount)) +$containerScriptContent = $containerScriptContent.Replace('__OUTPUT_MOUNT__', (Convert-ToSingleQuotedLiteral -Value $ContainerOutputMount)) +$containerScriptContent = $containerScriptContent.Replace('__WORKSPACE_ROOT__', (Convert-ToSingleQuotedLiteral -Value $ContainerWorkspaceRoot)) +Set-Content -LiteralPath $containerScriptPath -Value $containerScriptContent -Encoding utf8 + +$containerName = ('{0}-{1}' -f $ContainerNamePrefix, ([guid]::NewGuid().ToString('n').Substring(0, 12))).ToLowerInvariant() +$dockerRepoVolume = ('{0}:{1}' -f $repoRoot, $ContainerRepoMount) +$dockerOutputVolume = ('{0}:{1}' -f $resolvedOutputRoot, $ContainerOutputMount) +$dockerPayloadVolume = ('{0}:{1}' -f $hostPayloadRoot, $ContainerPayloadMount) +$dockerNsisVolume = ('{0}:{1}' -f $resolvedHostNsisRoot, $ContainerNsisMount) +$containerExitCode = 0 +$status = 'unknown' +$errors = @() +$containerReport = $null +$startedUtc = (Get-Date).ToUniversalTime() +$removeOnExit = -not $KeepContainerOnFailure + +try { + $runArgs = @($contextArgs + @('run')) + if ($removeOnExit) { + $runArgs += '--rm' + } + $runArgs += @( + '--name', $containerName, + '-v', $dockerRepoVolume, + '-v', $dockerOutputVolume, + '-v', $dockerPayloadVolume, + '-v', $dockerNsisVolume, + $Image, + 'powershell', '-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', (Join-Path $ContainerOutputMount 'container-run.ps1') + ) + + & docker @runArgs + $containerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($containerExitCode -ne 0) { + throw "docker run failed with exit code $containerExitCode" + } + if (-not (Test-Path -LiteralPath $containerReportPath -PathType Leaf)) { + throw "Container report missing: $containerReportPath" + } + + $containerReport = Get-Content -LiteralPath $containerReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + if ([string]$containerReport.status -ne 'succeeded') { + throw ("Container report status is '{0}' (expected 'succeeded')." -f [string]$containerReport.status) + } + + $status = 'succeeded' +} catch { + if ($containerExitCode -eq 0) { + $containerExitCode = 1 + } + $status = 'failed' + $errors += $_.Exception.Message + if (Test-Path -LiteralPath $containerReportPath -PathType Leaf) { + try { + $containerReport = Get-Content -LiteralPath $containerReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + } catch { + $errors += "Failed to parse container report JSON: $($_.Exception.Message)" + } + } +} finally { + if ($KeepContainerOnFailure -and $status -eq 'succeeded') { + & docker @($contextArgs + @('rm', '-f', $containerName)) *> $null + } +} + +$endedUtc = (Get-Date).ToUniversalTime() +[ordered]@{ + timestamp_utc = $endedUtc.ToString('o') + started_utc = $startedUtc.ToString('o') + status = $status + image = $Image + build_local_image = [bool]$BuildLocalImage + dockerfile = $resolvedDockerfilePath + docker_context = $DockerContext + container_name = $containerName + output_root = $resolvedOutputRoot + host_payload_root = $hostPayloadRoot + host_nsis_root = $resolvedHostNsisRoot + container_workspace_root = $ContainerWorkspaceRoot + container_exit_code = $containerExitCode + container_report_path = $containerReportPath + container_report = $containerReport + errors = $errors +} | ConvertTo-Json -Depth 12 | Set-Content -LiteralPath $hostReportPath -Encoding utf8 + +if (-not $KeepContainerScript -and (Test-Path -LiteralPath $containerScriptPath -PathType Leaf)) { + Remove-Item -LiteralPath $containerScriptPath -Force +} + +Write-Host "Windows container NSIS self-test report: $hostReportPath" + +if ($status -ne 'succeeded') { + exit 1 +} +exit 0 diff --git a/tests/Build-WorkspaceBootstrapInstaller.Tests.ps1 b/tests/Build-WorkspaceBootstrapInstaller.Tests.ps1 index 3406c46..ba5f150 100644 --- a/tests/Build-WorkspaceBootstrapInstaller.Tests.ps1 +++ b/tests/Build-WorkspaceBootstrapInstaller.Tests.ps1 @@ -41,6 +41,8 @@ Describe 'Build-WorkspaceBootstrapInstaller script' { It 'supports deterministic compare mode parameters' { $scriptContent = Get-Content -Path $script:scriptPath -Raw + $scriptContent | Should -Match '\[ValidateSet\(''NsisInstall'', ''LocalInstallerExercise'', ''ContainerSmoke''\)\]' + $scriptContent | Should -Match '/DINSTALL_EXEC_CONTEXT=' $scriptContent | Should -Match '\[bool\]\$Deterministic = \$true' $scriptContent | Should -Match '\[long\]\$SourceDateEpoch' $scriptContent | Should -Match '\[switch\]\$VerifyDeterminism' diff --git a/tests/LinuxContainerNsisParityContract.Tests.ps1 b/tests/LinuxContainerNsisParityContract.Tests.ps1 new file mode 100644 index 0000000..dd54c48 --- /dev/null +++ b/tests/LinuxContainerNsisParityContract.Tests.ps1 @@ -0,0 +1,53 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Linux container NSIS parity contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:parityScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-LinuxContainerNsisParity.ps1' + $script:dockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-linux/Dockerfile' + + if (-not (Test-Path -LiteralPath $script:parityScriptPath -PathType Leaf)) { + throw "Linux NSIS parity script missing: $script:parityScriptPath" + } + if (-not (Test-Path -LiteralPath $script:dockerfilePath -PathType Leaf)) { + throw "Linux NSIS parity Dockerfile missing: $script:dockerfilePath" + } + + $script:parityScriptContent = Get-Content -LiteralPath $script:parityScriptPath -Raw + $script:dockerfileContent = Get-Content -LiteralPath $script:dockerfilePath -Raw + } + + It 'runs a Linux parity container flow against desktop-linux context' { + $script:parityScriptContent | Should -Match '\[string\]\$DockerContext\s*=\s*''desktop-linux''' + $script:parityScriptContent | Should -Match 'docker run' + $script:parityScriptContent | Should -Match 'container-report\.json' + $script:parityScriptContent | Should -Match 'linux-container-nsis-parity-report\.json' + $script:parityScriptContent | Should -Match 'labviewcli' + $script:parityScriptContent | Should -Match 'LabVIEWCLI' + $script:parityScriptContent | Should -Match 'makensis' + $script:parityScriptContent | Should -Match 'windows_installer_not_executable_on_linux' + } + + It 'defines deterministic linux parity image dependencies' { + $script:dockerfileContent | Should -Match 'FROM nationalinstruments/labview:2026q1-linux' + $script:dockerfileContent | Should -Match 'apt-get install -y --no-install-recommends' + $script:dockerfileContent | Should -Match 'packages\.microsoft\.com/ubuntu/22\.04/prod' + $script:dockerfileContent | Should -Match 'dotnet-sdk-8\.0' + $script:dockerfileContent | Should -Match 'powershell' + $script:dockerfileContent | Should -Match 'nsis' + $script:dockerfileContent | Should -Match 'git' + $script:dockerfileContent | Should -Match 'jq' + $script:dockerfileContent | Should -Match 'gtk-update-icon-cache' + $script:dockerfileContent | Should -Match 'desktop-file-utils' + } + + It 'has parse-safe PowerShell syntax' { + $tokens = $null + $errors = $null + [void][System.Management.Automation.Language.Parser]::ParseInput($script:parityScriptContent, [ref]$tokens, [ref]$errors) + @($errors).Count | Should -Be 0 + } +} diff --git a/tests/LinuxNsisParityImagePublishWorkflowContract.Tests.ps1 b/tests/LinuxNsisParityImagePublishWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..a283935 --- /dev/null +++ b/tests/LinuxNsisParityImagePublishWorkflowContract.Tests.ps1 @@ -0,0 +1,38 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Linux NSIS parity image publish workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/publish-linux-nsis-parity-image.yml' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Linux NSIS parity image publish workflow missing: $script:workflowPath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + } + + It 'supports manual dispatch and deterministic main-path publish triggers' { + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'tools/nsis-selftest-linux/Dockerfile' + $script:workflowContent | Should -Match 'scripts/Invoke-LinuxContainerNsisParity\.ps1' + } + + It 'publishes to GHCR with package write permission' { + $script:workflowContent | Should -Match 'packages:\s*write' + $script:workflowContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-surface-nsis-linux-parity' + $script:workflowContent | Should -Match 'docker/login-action@v3' + $script:workflowContent | Should -Match 'docker/build-push-action@v6' + } + + It 'derives immutable tags and reports pushed digest' { + $script:workflowContent | Should -Match 'sha-\$\{short_sha\}' + $script:workflowContent | Should -Match 'BASE_TAG:\s*2026q1-linux' + $script:workflowContent | Should -Match '\$\{BASE_TAG\}-\$\{date_utc\}' + $script:workflowContent | Should -Match 'steps\.build\.outputs\.digest' + } +} diff --git a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 new file mode 100644 index 0000000..a519fd5 --- /dev/null +++ b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 @@ -0,0 +1,62 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Windows container NSIS self-test contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:selfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' + $script:dockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' + + if (-not (Test-Path -LiteralPath $script:selfTestScriptPath -PathType Leaf)) { + throw "Windows container self-test script missing: $script:selfTestScriptPath" + } + if (-not (Test-Path -LiteralPath $script:dockerfilePath -PathType Leaf)) { + throw "Windows self-test Dockerfile missing: $script:dockerfilePath" + } + + $script:selfTestScriptContent = Get-Content -LiteralPath $script:selfTestScriptPath -Raw + $script:dockerfileContent = Get-Content -LiteralPath $script:dockerfilePath -Raw + } + + It 'builds and runs a Windows containerized NSIS smoke install flow' { + $script:selfTestScriptContent | Should -Match '''build'', ''-f''' + $script:selfTestScriptContent | Should -Match '''run''' + $script:selfTestScriptContent | Should -Match '& docker @runArgs' + $script:selfTestScriptContent | Should -Match '''--format'', ''\{\{\.OSType\}\}''' + $script:selfTestScriptContent | Should -Match 'windows_container_mode_required' + $script:selfTestScriptContent | Should -Match 'Build-RunnerCliBundleFromManifest\.ps1' + $script:selfTestScriptContent | Should -Match 'Build-WorkspaceBootstrapInstaller\.ps1' + $script:selfTestScriptContent | Should -Match 'Convert-ManifestToWorkspace\.ps1' + $script:selfTestScriptContent | Should -Match '-InstallerExecutionContext ''ContainerSmoke''' + $script:selfTestScriptContent | Should -Match '''/S''' + $script:selfTestScriptContent | Should -Match 'workspace-install-latest\.json' + $script:selfTestScriptContent | Should -Match 'container-report\.json' + $script:selfTestScriptContent | Should -Match 'windows-container-nsis-selftest-report\.json' + } + + It 'supports deterministic troubleshooting controls for container lifecycle' { + $script:selfTestScriptContent | Should -Match '\[switch\]\$BuildLocalImage' + $script:selfTestScriptContent | Should -Match '\[switch\]\$KeepContainerScript' + $script:selfTestScriptContent | Should -Match '\[switch\]\$KeepContainerOnFailure' + $script:selfTestScriptContent | Should -Match '\$ContainerNamePrefix' + $script:selfTestScriptContent | Should -Match '\$DockerContext' + } + + It 'pins a Windows base image aligned to 2026q1 with minimal runtime surface' { + $script:dockerfileContent | Should -Match 'nationalinstruments/labview:2026q1-windows' + $script:dockerfileContent | Should -Match 'SHELL \["powershell"' + $script:dockerfileContent | Should -Match 'WORKDIR C:\\workspace' + $script:dockerfileContent | Should -Not -Match 'dotnet-install\.ps1' + $script:dockerfileContent | Should -Not -Match 'MinGit-' + $script:dockerfileContent | Should -Not -Match 'nsis-' + } + + It 'has parse-safe PowerShell syntax' { + $tokens = $null + $errors = $null + [void][System.Management.Automation.Language.Parser]::ParseInput($script:selfTestScriptContent, [ref]$tokens, [ref]$errors) + @($errors).Count | Should -Be 0 + } +} diff --git a/tests/WorkspaceInstallRuntimeContract.Tests.ps1 b/tests/WorkspaceInstallRuntimeContract.Tests.ps1 index fd52b60..ef2d27e 100644 --- a/tests/WorkspaceInstallRuntimeContract.Tests.ps1 +++ b/tests/WorkspaceInstallRuntimeContract.Tests.ps1 @@ -26,6 +26,8 @@ Describe 'Workspace install runtime contract' { $script:scriptContent | Should -Match "Get-Command 'pwsh'" $script:scriptContent | Should -Match "Required command 'powershell' \(or fallback 'pwsh'\)" $script:scriptContent | Should -Match 'foreach \(\$commandName in @\(''git'', ''gh'', ''g-cli''\)\)' + $script:scriptContent | Should -Match 'Optional command ''\$commandName'' was not found on PATH for ContainerSmoke context.' + $script:scriptContent | Should -Match 'ContainerSmoke context forces LVIE_OFFLINE_GIT_MODE behavior.' $script:scriptContent | Should -Match 'invalid pinned_sha' $script:scriptContent | Should -Match 'head_sha_mismatch' $script:scriptContent | Should -Match 'remote_mismatch_' @@ -50,9 +52,12 @@ Describe 'Workspace install runtime contract' { $script:scriptContent | Should -Match 'LVIE_INSTALLER_EXECUTION_PROFILE' $script:scriptContent | Should -Match 'host-release' $script:scriptContent | Should -Match 'container-parity' + $script:scriptContent | Should -Match 'ContainerSmoke context skips repository contract enforcement.' $script:scriptContent | Should -Match 'required_ppl_bitnesses' $script:scriptContent | Should -Match 'required_vip_bitness' $script:scriptContent | Should -Match '-InstallerExecutionContext NsisInstall' + $script:scriptContent | Should -Match 'ContainerSmoke context skips runner-cli PPL capability gates.' + $script:scriptContent | Should -Match 'ContainerSmoke context skips runner-cli VIP harness gate.' $script:scriptContent | Should -Match 'Container parity profile requires LVIE_GATE_SINGLE_PPL_BITNESS' $script:scriptContent | Should -Match 'Invoke-RunnerCliPplCapabilityCheck' $script:scriptContent | Should -Match 'Test-PplBuildLabVIEWVersionAlignment' diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 4a4cf9b..6e5872f 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -31,6 +31,10 @@ Describe 'Workspace surface contract' { $script:installFromReleaseScriptPath = Join-Path $script:repoRoot 'scripts/Install-WorkspaceInstallerFromRelease.ps1' $script:testReleaseClientContractsScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseClientContracts.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' + $script:windowsContainerNsisSelfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' + $script:windowsContainerNsisDockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' + $script:linuxContainerNsisParityScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-LinuxContainerNsisParity.ps1' + $script:linuxContainerNsisDockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-linux/Dockerfile' $script:nsisInstallerPath = Join-Path $script:repoRoot 'nsis/workspace-bootstrap-installer.nsi' $script:ciWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ci.yml' $script:driftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/workspace-sha-drift-signal.yml' @@ -41,6 +45,7 @@ Describe 'Workspace surface contract' { $script:releaseCoreWorkflowPath = Join-Path $script:repoRoot '.github/workflows/_release-workspace-installer-core.yml' $script:releaseWithGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-with-windows-gate.yml' $script:canaryWorkflowPath = Join-Path $script:repoRoot '.github/workflows/nightly-supplychain-canary.yml' + $script:linuxNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-linux-nsis-parity-image.yml' $script:windowsImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/windows-labview-image-gate.yml' $script:windowsImageGateCoreWorkflowPath = Join-Path $script:repoRoot '.github/workflows/_windows-labview-image-gate-core.yml' $script:linuxImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/linux-labview-image-gate.yml' @@ -83,6 +88,10 @@ Describe 'Workspace surface contract' { $script:installFromReleaseScriptPath, $script:testReleaseClientContractsScriptPath, $script:dockerLinuxIterationScriptPath, + $script:windowsContainerNsisSelfTestScriptPath, + $script:windowsContainerNsisDockerfilePath, + $script:linuxContainerNsisParityScriptPath, + $script:linuxContainerNsisDockerfilePath, $script:nsisInstallerPath, $script:ciWorkflowPath, $script:driftWorkflowPath, @@ -93,6 +102,7 @@ Describe 'Workspace surface contract' { $script:releaseCoreWorkflowPath, $script:releaseWithGateWorkflowPath, $script:canaryWorkflowPath, + $script:linuxNsisParityImagePublishWorkflowPath, $script:windowsImageGateWorkflowPath, $script:windowsImageGateCoreWorkflowPath, $script:linuxImageGateWorkflowPath, @@ -368,6 +378,9 @@ Describe 'Workspace surface contract' { $script:ciWorkflowContent | Should -Match 'WorkspaceShaRefreshPrContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'WorkspaceManifestPinRefreshScript\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxLabviewImageGateWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'LinuxContainerNsisParityContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'LinuxNsisParityImagePublishWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'WindowsContainerNsisSelfTestContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'IsolatedBuildWorkspacePolicyContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'GitSafeDirectoryPolicyContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'ENABLE_SELF_HOSTED_CONTRACTS' diff --git a/tools/nsis-selftest-linux/Dockerfile b/tools/nsis-selftest-linux/Dockerfile new file mode 100644 index 0000000..2eca7dc --- /dev/null +++ b/tools/nsis-selftest-linux/Dockerfile @@ -0,0 +1,35 @@ +FROM nationalinstruments/labview:2026q1-linux + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + desktop-file-utils \ + gpg \ + gtk-update-icon-cache \ + libglu1-mesa \ + libx11-6 \ + libxinerama1 \ + xvfb \ + && mkdir -p /etc/apt/keyrings \ + && curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg \ + && chmod go+r /etc/apt/keyrings/microsoft.gpg \ + && echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" > /etc/apt/sources.list.d/microsoft-prod.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + dotnet-sdk-8.0 \ + git \ + jq \ + nsis \ + powershell \ + unzip \ + xz-utils \ + zip \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace diff --git a/tools/nsis-selftest-linux/README.md b/tools/nsis-selftest-linux/README.md new file mode 100644 index 0000000..2f2ade3 --- /dev/null +++ b/tools/nsis-selftest-linux/README.md @@ -0,0 +1,28 @@ +# Linux NSIS Parity Runtime + +This image mirrors the containerized NSIS self-test posture on Linux, aligned to `nationalinstruments/labview:2026q1-linux`. + +## Purpose + +- Provide a Linux parity runtime that includes LabVIEW base image context plus NSIS, git, dotnet, and PowerShell. +- Exercise deterministic toolchain probes and NSIS smoke compile from the same runtime used for Linux parity lanes. +- Emit machine-readable parity reports via `scripts/Invoke-LinuxContainerNsisParity.ps1`. +- Keep dependency installation apt-driven for Ubuntu 22.04 parity, consistent with NI container guidance in `docs/linux-custom-images.md`. + +## Included tooling + +- Base image: `nationalinstruments/labview:2026q1-linux` +- Microsoft apt feed (`packages.microsoft.com/ubuntu/22.04/prod`) +- `.NET SDK` via `dotnet-sdk-8.0` (apt) +- `PowerShell` via `powershell` (apt) +- `git`, `jq`, `nsis` +- LabVIEW-supporting Linux dependencies kept explicit: `desktop-file-utils`, `gtk-update-icon-cache`, `libglu1-mesa`, `libx11-6`, `libxinerama1`, `xvfb` + +## Build manually + +```powershell +docker build ` + -f .\tools\nsis-selftest-linux\Dockerfile ` + -t labview-cdev-surface-nsis-linux-parity:local ` + .\tools\nsis-selftest-linux +``` diff --git a/tools/nsis-selftest-windows/Dockerfile b/tools/nsis-selftest-windows/Dockerfile new file mode 100644 index 0000000..0d5df22 --- /dev/null +++ b/tools/nsis-selftest-windows/Dockerfile @@ -0,0 +1,6 @@ +# escape=` +FROM nationalinstruments/labview:2026q1-windows + +SHELL ["powershell", "-NoLogo", "-NoProfile", "-Command", "$ErrorActionPreference='Stop'; $ProgressPreference='SilentlyContinue';"] + +WORKDIR C:\workspace diff --git a/tools/nsis-selftest-windows/README.md b/tools/nsis-selftest-windows/README.md new file mode 100644 index 0000000..1d163d3 --- /dev/null +++ b/tools/nsis-selftest-windows/README.md @@ -0,0 +1,30 @@ +# Windows NSIS Self-Test Runtime + +This image is the local runtime for `scripts/Invoke-WindowsContainerNsisSelfTest.ps1`. + +## Purpose + +- Build the workspace NSIS installer inside a Windows container. +- Run the installer in the same container for smoke validation. +- Validate install report output before the container exits. + +## Included tooling + +- Base image: `nationalinstruments/labview:2026q1-windows` +- Windows PowerShell (from base image) +- Host-mounted NSIS toolchain (`makensis.exe`) +- Host-prestaged payload containing bundled `runner-cli` + +## Host prerequisites + +- `C:\Program Files (x86)\NSIS\makensis.exe` available on host (override via wrapper parameters). +- Host `git` and `dotnet` available for payload staging (`Build-RunnerCliBundleFromManifest.ps1` runs on host before container launch). + +## Build manually + +```powershell +docker build ` + -f .\tools\nsis-selftest-windows\Dockerfile ` + -t labview-cdev-surface-nsis-selftest:local ` + .\tools\nsis-selftest-windows +``` From 49698f2ea22ced41d5da639a40578ca37c0cab18 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:17:53 -0800 Subject: [PATCH 08/60] feat: add windows NSIS parity image publish with silent self-test gate (#7) Co-authored-by: svelderrainruiz --- .github/workflows/ci.yml | 1 + .../publish-windows-nsis-parity-image.yml | 217 ++++++++++++++++++ README.md | 7 + ...rityImagePublishWorkflowContract.Tests.ps1 | 50 ++++ tests/WorkspaceSurfaceContract.Tests.ps1 | 3 + tools/nsis-selftest-windows/README.md | 9 +- 6 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/publish-windows-nsis-parity-image.yml create mode 100644 tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4946bcf..8ecae07 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,7 @@ jobs: './tests/PortableOpsRuntimeContract.Tests.ps1', './tests/OpsRuntimeImagePublishWorkflowContract.Tests.ps1', './tests/LinuxNsisParityImagePublishWorkflowContract.Tests.ps1', + './tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1', './tests/VsCodeTasksContract.Tests.ps1', './tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1', './tests/UploadArtifactRetryCompositeContract.Tests.ps1', diff --git a/.github/workflows/publish-windows-nsis-parity-image.yml b/.github/workflows/publish-windows-nsis-parity-image.yml new file mode 100644 index 0000000..436ff8f --- /dev/null +++ b/.github/workflows/publish-windows-nsis-parity-image.yml @@ -0,0 +1,217 @@ +name: publish-windows-nsis-parity-image + +on: + workflow_dispatch: + inputs: + promote_latest: + description: Also refresh the latest tag. + required: false + default: false + type: boolean + additional_tag: + description: Optional extra tag (for example canary or rc1). + required: false + default: '' + type: string + push: + branches: + - main + paths: + - tools/nsis-selftest-windows/Dockerfile + - tools/nsis-selftest-windows/README.md + - scripts/Invoke-WindowsContainerNsisSelfTest.ps1 + - scripts/Build-RunnerCliBundleFromManifest.ps1 + - scripts/Install-WorkspaceFromManifest.ps1 + - workspace-governance.json + - workspace-governance-payload/tools/cdev-cli/** + - .github/workflows/publish-windows-nsis-parity-image.yml + +permissions: + contents: read + packages: write + +concurrency: + group: publish-windows-nsis-parity-image-${{ github.ref }} + cancel-in-progress: false + +jobs: + publish: + name: Publish Windows NSIS Parity Image + runs-on: [self-hosted, windows, windows-containers, cdev-surface-windows-gate] + env: + IMAGE_REPO: ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-windows-parity + BASE_TAG: 2026q1-windows + WINDOWS_DOCKER_CONTEXT: ${{ vars.WINDOWS_DOCKER_CONTEXT }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Resolve deterministic tags + id: resolve + shell: powershell + run: | + $ErrorActionPreference = 'Stop' + + $dateUtc = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') + $shortSha = $env:GITHUB_SHA.Substring(0, 12).ToLowerInvariant() + $promoteLatest = '${{ github.event.inputs.promote_latest }}' + $additionalTag = '${{ github.event.inputs.additional_tag }}' + + if ([string]::IsNullOrWhiteSpace($promoteLatest)) { + $promoteLatest = 'false' + } + if ($promoteLatest -notin @('true', 'false')) { + throw "promote_latest must be true or false (got '$promoteLatest')." + } + if (-not [string]::IsNullOrWhiteSpace($additionalTag) -and $additionalTag -notmatch '^[A-Za-z0-9._-]+$') { + throw "additional_tag must match ^[A-Za-z0-9._-]+$." + } + + $tags = @( + "$env:IMAGE_REPO`:sha-$shortSha", + "$env:IMAGE_REPO`:$env:BASE_TAG-$dateUtc" + ) + if ($promoteLatest -eq 'true') { + $tags += "$env:IMAGE_REPO`:latest" + } + if (-not [string]::IsNullOrWhiteSpace($additionalTag)) { + $tags += "$env:IMAGE_REPO`:$additionalTag" + } + + $localImage = "labview-cdev-surface-nsis-windows-parity:selftest-$shortSha" + + "date_utc=$dateUtc" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + "short_sha=$shortSha" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + "local_image=$localImage" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + "tags<$null + if ($LASTEXITCODE -ne 0) { + throw "docker_info_failed: unable to query Docker engine OSType." + } + $osTypeNormalized = ([string]$osType).Trim().ToLowerInvariant() + if ($osTypeNormalized -ne 'windows') { + throw "windows_container_mode_required: Docker engine OSType is '$osTypeNormalized'." + } + + - name: Exercise silent installer in Windows container + shell: powershell + run: | + $ErrorActionPreference = 'Stop' + + $outputRoot = Join-Path $env:GITHUB_WORKSPACE 'artifacts\release\windows-container-nsis-selftest-publish' + $scriptArgs = @( + '-NoProfile', + '-ExecutionPolicy', 'Bypass', + '-File', '.\scripts\Invoke-WindowsContainerNsisSelfTest.ps1', + '-Image', '${{ steps.resolve.outputs.local_image }}', + '-BuildLocalImage', + '-OutputRoot', $outputRoot + ) + if (-not [string]::IsNullOrWhiteSpace($env:WINDOWS_DOCKER_CONTEXT)) { + $scriptArgs += @('-DockerContext', $env:WINDOWS_DOCKER_CONTEXT) + } + + & powershell @scriptArgs + if ($LASTEXITCODE -ne 0) { + throw "Invoke-WindowsContainerNsisSelfTest.ps1 failed with exit code $LASTEXITCODE." + } + + $reportPath = Join-Path $outputRoot 'windows-container-nsis-selftest-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "Windows self-test report missing: $reportPath" + } + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + if ([string]$report.status -ne 'succeeded') { + throw "Windows self-test report status is '$([string]$report.status)' (expected 'succeeded')." + } + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Publish image tags + id: publish + shell: powershell + run: | + $ErrorActionPreference = 'Stop' + + $localImage = '${{ steps.resolve.outputs.local_image }}' + $imagePresent = (& docker image inspect $localImage 2>$null) + if ($LASTEXITCODE -ne 0 -or -not $imagePresent) { + throw "Local image missing after self-test build: $localImage" + } + + $tagsText = @' +${{ steps.resolve.outputs.tags }} +'@ + $tags = @($tagsText -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) + if ($tags.Count -eq 0) { + throw 'No publish tags resolved.' + } + + $digest = '' + foreach ($tag in $tags) { + & docker tag $localImage $tag + if ($LASTEXITCODE -ne 0) { + throw "docker tag failed for '$tag'." + } + + $pushOutput = & docker push $tag 2>&1 + if ($LASTEXITCODE -ne 0) { + $pushOutput | ForEach-Object { Write-Host $_ } + throw "docker push failed for '$tag'." + } + $pushOutput | ForEach-Object { Write-Host $_ } + + $pushText = ($pushOutput | Out-String) + $digestMatch = [regex]::Match($pushText, 'digest:\s*(sha256:[0-9a-f]{64})') + if (-not $digestMatch.Success) { + throw "Unable to parse digest from docker push output for '$tag'." + } + + $tagDigest = $digestMatch.Groups[1].Value.ToLowerInvariant() + if ([string]::IsNullOrWhiteSpace($digest)) { + $digest = $tagDigest + } elseif ($digest -ne $tagDigest) { + throw "Digest mismatch across tags. expected '$digest' got '$tagDigest' for '$tag'." + } + } + + "digest=$digest" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + + - name: Publish summary + shell: powershell + run: | + $ErrorActionPreference = 'Stop' + + $tagsText = @' +${{ steps.resolve.outputs.tags }} +'@ + $tags = @($tagsText -split "`r?`n" | ForEach-Object { $_.Trim() } | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) + + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value '## Windows NSIS Parity Image Published' + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value '' + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Image: ``$env:IMAGE_REPO``" + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Digest: ``${{ steps.publish.outputs.digest }}``" + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Commit: ``$env:GITHUB_SHA``" + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Silent self-test: ``Invoke-WindowsContainerNsisSelfTest.ps1`` passed" + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value '- Tags:' + foreach ($tag in $tags) { + Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value " - ``$tag``" + } diff --git a/README.md b/README.md index 00f521b..27f78b3 100644 --- a/README.md +++ b/README.md @@ -211,12 +211,19 @@ pwsh -NoProfile -File .\scripts\Invoke-WindowsContainerNsisSelfTest.ps1 ` ``` This wrapper fails fast with `windows_container_mode_required` unless Docker reports `OSType=windows`. +The runtime stages manifest-pinned `cdev-cli` assets before building the installer, then executes the installer in silent mode (`/S`) inside the same container. Outputs are written under: - `artifacts\release\windows-container-nsis-selftest` - `container-report.json` - `windows-container-nsis-selftest-report.json` +Publish the Windows parity image to GHCR with deterministic tags and pre-publish silent-install gating: +- Workflow: `.github/workflows/publish-windows-nsis-parity-image.yml` +- Image repo: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-windows-parity` +- Default tags: `sha-<12-char-commit>`, `2026q1-windows-` +- Optional manual tags: `latest` (`promote_latest=true`) and `additional_tag` + ## Linux NSIS parity container Use the Linux parity runtime aligned to `nationalinstruments/labview:2026q1-linux`: diff --git a/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 b/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..7823d31 --- /dev/null +++ b/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 @@ -0,0 +1,50 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Windows NSIS parity image publish workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/publish-windows-nsis-parity-image.yml' + + if (-not (Test-Path -LiteralPath $script:workflowPath -PathType Leaf)) { + throw "Windows NSIS parity image publish workflow missing: $script:workflowPath" + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + } + + It 'supports manual dispatch and deterministic main-path publish triggers including cdev-cli payload' { + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'tools/nsis-selftest-windows/Dockerfile' + $script:workflowContent | Should -Match 'scripts/Invoke-WindowsContainerNsisSelfTest\.ps1' + $script:workflowContent | Should -Match 'scripts/Build-RunnerCliBundleFromManifest\.ps1' + $script:workflowContent | Should -Match 'scripts/Install-WorkspaceFromManifest\.ps1' + $script:workflowContent | Should -Match 'workspace-governance-payload/tools/cdev-cli/\*\*' + $script:workflowContent | Should -Match 'workspace-governance\.json' + } + + It 'enforces windows container preflight and silent self-test gate before publish' { + $script:workflowContent | Should -Match 'runs-on:\s*\[self-hosted,\s*windows,\s*windows-containers,\s*cdev-surface-windows-gate\]' + $script:workflowContent | Should -Match 'windows_container_mode_required' + $script:workflowContent | Should -Match 'Invoke-WindowsContainerNsisSelfTest\.ps1' + $script:workflowContent | Should -Match '-BuildLocalImage' + $script:workflowContent | Should -Match 'windows-container-nsis-selftest-publish' + $script:workflowContent | Should -Match 'Silent self-test' + } + + It 'publishes to GHCR with package write permission and deterministic digest reporting' { + $script:workflowContent | Should -Match 'packages:\s*write' + $script:workflowContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-surface-nsis-windows-parity' + $script:workflowContent | Should -Match 'docker/login-action@v3' + $script:workflowContent | Should -Match 'docker push' + $script:workflowContent | Should -Match 'sha-\$shortSha' + $script:workflowContent | Should -Match 'BASE_TAG:\s*2026q1-windows' + $script:workflowContent | Should -Match '\$env:BASE_TAG-\$dateUtc' + $script:workflowContent | Should -Match 'digest=\$digest' + $script:workflowContent | Should -Match 'digestMatch\s*=\s*\[regex\]::Match' + $script:workflowContent | Should -Match 'sha256:\[0-9a-f\]\{64\}' + } +} diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 6e5872f..285e565 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -46,6 +46,7 @@ Describe 'Workspace surface contract' { $script:releaseWithGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-with-windows-gate.yml' $script:canaryWorkflowPath = Join-Path $script:repoRoot '.github/workflows/nightly-supplychain-canary.yml' $script:linuxNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-linux-nsis-parity-image.yml' + $script:windowsNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-windows-nsis-parity-image.yml' $script:windowsImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/windows-labview-image-gate.yml' $script:windowsImageGateCoreWorkflowPath = Join-Path $script:repoRoot '.github/workflows/_windows-labview-image-gate-core.yml' $script:linuxImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/linux-labview-image-gate.yml' @@ -103,6 +104,7 @@ Describe 'Workspace surface contract' { $script:releaseWithGateWorkflowPath, $script:canaryWorkflowPath, $script:linuxNsisParityImagePublishWorkflowPath, + $script:windowsNsisParityImagePublishWorkflowPath, $script:windowsImageGateWorkflowPath, $script:windowsImageGateCoreWorkflowPath, $script:linuxImageGateWorkflowPath, @@ -380,6 +382,7 @@ Describe 'Workspace surface contract' { $script:ciWorkflowContent | Should -Match 'LinuxLabviewImageGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxContainerNsisParityContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxNsisParityImagePublishWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'WindowsNsisParityImagePublishWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'WindowsContainerNsisSelfTestContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'IsolatedBuildWorkspacePolicyContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'GitSafeDirectoryPolicyContract\.Tests\.ps1' diff --git a/tools/nsis-selftest-windows/README.md b/tools/nsis-selftest-windows/README.md index 1d163d3..0bbfab1 100644 --- a/tools/nsis-selftest-windows/README.md +++ b/tools/nsis-selftest-windows/README.md @@ -5,8 +5,9 @@ This image is the local runtime for `scripts/Invoke-WindowsContainerNsisSelfTest ## Purpose - Build the workspace NSIS installer inside a Windows container. -- Run the installer in the same container for smoke validation. +- Run the installer in the same container for silent smoke validation (`/S`). - Validate install report output before the container exits. +- Stage manifest-pinned `cdev-cli` payload assets before container execution. ## Included tooling @@ -28,3 +29,9 @@ docker build ` -t labview-cdev-surface-nsis-selftest:local ` .\tools\nsis-selftest-windows ``` + +## Publish from GitHub + +- Workflow: `.github/workflows/publish-windows-nsis-parity-image.yml` +- Image: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-windows-parity` +- Publish is gated by `scripts/Invoke-WindowsContainerNsisSelfTest.ps1` and fails before push if silent install checks fail. From de6521f7da15e379763f8f0df58fa8b532c30f64 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:22:31 -0800 Subject: [PATCH 09/60] fix: correct windows parity publish workflow YAML tag parsing (#8) Co-authored-by: svelderrainruiz --- .../publish-windows-nsis-parity-image.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/publish-windows-nsis-parity-image.yml b/.github/workflows/publish-windows-nsis-parity-image.yml index 436ff8f..da60f04 100644 --- a/.github/workflows/publish-windows-nsis-parity-image.yml +++ b/.github/workflows/publish-windows-nsis-parity-image.yml @@ -79,10 +79,12 @@ jobs: } $localImage = "labview-cdev-surface-nsis-windows-parity:selftest-$shortSha" + $tagsCsv = [string]::Join('|', $tags) "date_utc=$dateUtc" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 "short_sha=$shortSha" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 "local_image=$localImage" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + "tags_csv=$tagsCsv" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 "tags< Date: Thu, 26 Feb 2026 17:26:44 -0800 Subject: [PATCH 10/60] fix: auto-resolve Windows docker context for publish workflow (#9) * fix: auto-resolve windows docker context in parity image publish * fix: make windows parity publish manual-only and context-aware --------- Co-authored-by: svelderrainruiz --- .../publish-windows-nsis-parity-image.yml | 69 ++++++++++++------- README.md | 1 + ...rityImagePublishWorkflowContract.Tests.ps1 | 12 ++-- tools/nsis-selftest-windows/README.md | 1 + 4 files changed, 51 insertions(+), 32 deletions(-) diff --git a/.github/workflows/publish-windows-nsis-parity-image.yml b/.github/workflows/publish-windows-nsis-parity-image.yml index da60f04..ffc9b0b 100644 --- a/.github/workflows/publish-windows-nsis-parity-image.yml +++ b/.github/workflows/publish-windows-nsis-parity-image.yml @@ -13,18 +13,6 @@ on: required: false default: '' type: string - push: - branches: - - main - paths: - - tools/nsis-selftest-windows/Dockerfile - - tools/nsis-selftest-windows/README.md - - scripts/Invoke-WindowsContainerNsisSelfTest.ps1 - - scripts/Build-RunnerCliBundleFromManifest.ps1 - - scripts/Install-WorkspaceFromManifest.ps1 - - workspace-governance.json - - workspace-governance-payload/tools/cdev-cli/** - - .github/workflows/publish-windows-nsis-parity-image.yml permissions: contents: read @@ -89,31 +77,64 @@ jobs: $tags | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 "EOF" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 - - name: Assert Windows Docker engine + - name: Resolve Windows Docker context + id: resolve_context shell: powershell run: | $ErrorActionPreference = 'Stop' - $dockerArgs = @() - if (-not [string]::IsNullOrWhiteSpace($env:WINDOWS_DOCKER_CONTEXT)) { - $dockerArgs += @('--context', $env:WINDOWS_DOCKER_CONTEXT) + $candidateContexts = New-Object System.Collections.Generic.List[string] + $requestedContext = $env:WINDOWS_DOCKER_CONTEXT + if (-not [string]::IsNullOrWhiteSpace($requestedContext)) { + $candidateContexts.Add($requestedContext) } + $candidateContexts.Add('') + $candidateContexts.Add('desktop-windows') - $osType = & docker @($dockerArgs + @('info', '--format', '{{.OSType}}')) 2>$null - if ($LASTEXITCODE -ne 0) { - throw "docker_info_failed: unable to query Docker engine OSType." + $seen = @{} + $resolvedContext = $null + foreach ($candidate in $candidateContexts) { + if ($seen.ContainsKey($candidate)) { + continue + } + $seen[$candidate] = $true + + $dockerArgs = @() + if (-not [string]::IsNullOrWhiteSpace($candidate)) { + $dockerArgs += @('--context', $candidate) + } + + $osType = & docker @($dockerArgs + @('info', '--format', '{{.OSType}}')) 2>$null + if ($LASTEXITCODE -ne 0) { + continue + } + $osTypeNormalized = ([string]$osType).Trim().ToLowerInvariant() + if ($osTypeNormalized -eq 'windows') { + $resolvedContext = $candidate + break + } } - $osTypeNormalized = ([string]$osType).Trim().ToLowerInvariant() - if ($osTypeNormalized -ne 'windows') { - throw "windows_container_mode_required: Docker engine OSType is '$osTypeNormalized'." + + if ($null -eq $resolvedContext) { + throw "windows_container_mode_required: unable to resolve a Windows Docker context. Set repository variable WINDOWS_DOCKER_CONTEXT or switch Docker Desktop to Windows containers." } + if (-not [string]::IsNullOrWhiteSpace($resolvedContext)) { + & docker context use $resolvedContext | Out-Host + if ($LASTEXITCODE -ne 0) { + throw "docker_context_switch_failed: failed to activate Docker context '$resolvedContext'." + } + } + + "resolved_windows_docker_context=$resolvedContext" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + - name: Exercise silent installer in Windows container shell: powershell run: | $ErrorActionPreference = 'Stop' $outputRoot = Join-Path $env:GITHUB_WORKSPACE 'artifacts\release\windows-container-nsis-selftest-publish' + $resolvedContext = '${{ steps.resolve_context.outputs.resolved_windows_docker_context }}' $scriptArgs = @( '-NoProfile', '-ExecutionPolicy', 'Bypass', @@ -122,8 +143,8 @@ jobs: '-BuildLocalImage', '-OutputRoot', $outputRoot ) - if (-not [string]::IsNullOrWhiteSpace($env:WINDOWS_DOCKER_CONTEXT)) { - $scriptArgs += @('-DockerContext', $env:WINDOWS_DOCKER_CONTEXT) + if (-not [string]::IsNullOrWhiteSpace($resolvedContext)) { + $scriptArgs += @('-DockerContext', $resolvedContext) } & powershell @scriptArgs diff --git a/README.md b/README.md index 27f78b3..5c199a1 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,7 @@ Outputs are written under: Publish the Windows parity image to GHCR with deterministic tags and pre-publish silent-install gating: - Workflow: `.github/workflows/publish-windows-nsis-parity-image.yml` +- Trigger mode: manual `workflow_dispatch` (publish contract is still validated by hosted-runner CI) - Image repo: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-windows-parity` - Default tags: `sha-<12-char-commit>`, `2026q1-windows-` - Optional manual tags: `latest` (`promote_latest=true`) and `additional_tag` diff --git a/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 b/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 index 7823d31..e53504e 100644 --- a/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 +++ b/tests/WindowsNsisParityImagePublishWorkflowContract.Tests.ps1 @@ -15,15 +15,11 @@ Describe 'Windows NSIS parity image publish workflow contract' { $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw } - It 'supports manual dispatch and deterministic main-path publish triggers including cdev-cli payload' { + It 'is manually dispatched and enforced by hosted-runner CI contract coverage' { $script:workflowContent | Should -Match 'workflow_dispatch:' - $script:workflowContent | Should -Match 'push:' - $script:workflowContent | Should -Match 'tools/nsis-selftest-windows/Dockerfile' - $script:workflowContent | Should -Match 'scripts/Invoke-WindowsContainerNsisSelfTest\.ps1' - $script:workflowContent | Should -Match 'scripts/Build-RunnerCliBundleFromManifest\.ps1' - $script:workflowContent | Should -Match 'scripts/Install-WorkspaceFromManifest\.ps1' - $script:workflowContent | Should -Match 'workspace-governance-payload/tools/cdev-cli/\*\*' - $script:workflowContent | Should -Match 'workspace-governance\.json' + $script:workflowContent | Should -Not -Match 'push:' + $script:workflowContent | Should -Match 'promote_latest:' + $script:workflowContent | Should -Match 'additional_tag:' } It 'enforces windows container preflight and silent self-test gate before publish' { diff --git a/tools/nsis-selftest-windows/README.md b/tools/nsis-selftest-windows/README.md index 0bbfab1..36ab8da 100644 --- a/tools/nsis-selftest-windows/README.md +++ b/tools/nsis-selftest-windows/README.md @@ -33,5 +33,6 @@ docker build ` ## Publish from GitHub - Workflow: `.github/workflows/publish-windows-nsis-parity-image.yml` +- Trigger: manual `workflow_dispatch` (workflow shape is enforced by hosted-runner CI contract tests) - Image: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-nsis-windows-parity` - Publish is gated by `scripts/Invoke-WindowsContainerNsisSelfTest.ps1` and fails before push if silent install checks fail. From 72927f1b3519c687de32059c6b6ed472d04e4b0c Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:29:09 -0800 Subject: [PATCH 11/60] fix: harden windows publish context resolution error handling (#10) Co-authored-by: svelderrainruiz --- .../publish-windows-nsis-parity-image.yml | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-windows-nsis-parity-image.yml b/.github/workflows/publish-windows-nsis-parity-image.yml index ffc9b0b..f3086f0 100644 --- a/.github/workflows/publish-windows-nsis-parity-image.yml +++ b/.github/workflows/publish-windows-nsis-parity-image.yml @@ -104,8 +104,13 @@ jobs: $dockerArgs += @('--context', $candidate) } - $osType = & docker @($dockerArgs + @('info', '--format', '{{.OSType}}')) 2>$null - if ($LASTEXITCODE -ne 0) { + $osType = $null + try { + $osType = & docker @($dockerArgs + @('info', '--format', '{{.OSType}}')) 2>$null + } catch { + continue + } + if ($LASTEXITCODE -ne 0 -or $null -eq $osType) { continue } $osTypeNormalized = ([string]$osType).Trim().ToLowerInvariant() @@ -115,6 +120,35 @@ jobs: } } + if ($null -eq $resolvedContext) { + $dockerCliExe = Join-Path $env:ProgramFiles 'Docker\Docker\DockerCli.exe' + if (Test-Path -LiteralPath $dockerCliExe -PathType Leaf) { + try { + & $dockerCliExe -SwitchWindowsEngine | Out-Host + if ($LASTEXITCODE -eq 0) { + for ($attempt = 1; $attempt -le 12; $attempt++) { + Start-Sleep -Seconds 5 + $osTypeAfterSwitch = $null + try { + $osTypeAfterSwitch = & docker info --format '{{.OSType}}' 2>$null + } catch { + continue + } + if ($LASTEXITCODE -eq 0 -and $null -ne $osTypeAfterSwitch) { + $osTypeAfterSwitchNormalized = ([string]$osTypeAfterSwitch).Trim().ToLowerInvariant() + if ($osTypeAfterSwitchNormalized -eq 'windows') { + $resolvedContext = '' + break + } + } + } + } + } catch { + Write-Warning "docker_switch_windows_engine_failed: $($_.Exception.Message)" + } + } + } + if ($null -eq $resolvedContext) { throw "windows_container_mode_required: unable to resolve a Windows Docker context. Set repository variable WINDOWS_DOCKER_CONTEXT or switch Docker Desktop to Windows containers." } From ae3f98ed9289a5d55c9fefd338f9dd265bd8e80b Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:34:46 -0800 Subject: [PATCH 12/60] Lock CI to hosted runners and fix Windows publish pwsh shell (#11) Co-authored-by: svelderrainruiz --- .github/workflows/ci.yml | 9 +++++++++ .../workflows/publish-windows-nsis-parity-image.yml | 12 ++++++------ .github/workflows/release-control-plane.yml | 9 +++++++++ tests/CiWorkflowReliabilityContract.Tests.ps1 | 7 +++++++ tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 | 4 ++++ 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8ecae07..41d72e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,15 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Enforce hosted-runner lock + shell: pwsh + run: | + $ErrorActionPreference = 'Stop' + $runnerEnvironment = [string]$env:RUNNER_ENVIRONMENT + if ($runnerEnvironment -ne 'github-hosted') { + throw "hosted_runner_required: CI Pipeline must run on a github-hosted runner. actual='$runnerEnvironment'" + } + - name: Install Pester shell: pwsh run: | diff --git a/.github/workflows/publish-windows-nsis-parity-image.yml b/.github/workflows/publish-windows-nsis-parity-image.yml index f3086f0..c1e9aec 100644 --- a/.github/workflows/publish-windows-nsis-parity-image.yml +++ b/.github/workflows/publish-windows-nsis-parity-image.yml @@ -36,7 +36,7 @@ jobs: - name: Resolve deterministic tags id: resolve - shell: powershell + shell: pwsh run: | $ErrorActionPreference = 'Stop' @@ -79,7 +79,7 @@ jobs: - name: Resolve Windows Docker context id: resolve_context - shell: powershell + shell: pwsh run: | $ErrorActionPreference = 'Stop' @@ -163,7 +163,7 @@ jobs: "resolved_windows_docker_context=$resolvedContext" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 - name: Exercise silent installer in Windows container - shell: powershell + shell: pwsh run: | $ErrorActionPreference = 'Stop' @@ -181,7 +181,7 @@ jobs: $scriptArgs += @('-DockerContext', $resolvedContext) } - & powershell @scriptArgs + & pwsh @scriptArgs if ($LASTEXITCODE -ne 0) { throw "Invoke-WindowsContainerNsisSelfTest.ps1 failed with exit code $LASTEXITCODE." } @@ -204,7 +204,7 @@ jobs: - name: Publish image tags id: publish - shell: powershell + shell: pwsh run: | $ErrorActionPreference = 'Stop' @@ -251,7 +251,7 @@ jobs: "digest=$digest" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 - name: Publish summary - shell: powershell + shell: pwsh run: | $ErrorActionPreference = 'Stop' diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index b59501e..3fab778 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -50,6 +50,15 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Enforce hosted-runner lock + shell: pwsh + run: | + $ErrorActionPreference = 'Stop' + $runnerEnvironment = [string]$env:RUNNER_ENVIRONMENT + if ($runnerEnvironment -ne 'github-hosted') { + throw "hosted_runner_required: release-control-plane must run on a github-hosted runner. actual='$runnerEnvironment'" + } + - name: Execute autonomous release control plane shell: pwsh env: diff --git a/tests/CiWorkflowReliabilityContract.Tests.ps1 b/tests/CiWorkflowReliabilityContract.Tests.ps1 index 78131b4..006bf1c 100644 --- a/tests/CiWorkflowReliabilityContract.Tests.ps1 +++ b/tests/CiWorkflowReliabilityContract.Tests.ps1 @@ -20,6 +20,13 @@ Describe 'CI workflow reliability contract' { $script:workflowContent | Should -Match 'cancel-in-progress:\s*true' } + It 'locks the contract suite to github-hosted ubuntu CI runners' { + $script:workflowContent | Should -Match '(?ms)ci-pipeline:\s*.*?runs-on:\s*ubuntu-latest' + $script:workflowContent | Should -Match 'Enforce hosted-runner lock' + $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' + $script:workflowContent | Should -Match 'hosted_runner_required' + } + It 'uses reusable upload-artifact retry composite for workspace installer artifacts' { $script:workflowContent | Should -Match 'id:\s*upload-workspace-installer-artifact' $script:workflowContent | Should -Match 'uses:\s*\./\.github/actions/upload-artifact-retry' diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index bfc65cc..9d755ff 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -31,6 +31,10 @@ Describe 'Release control plane workflow contract' { } It 'runs autonomous control-plane runtime and uploads report' { + $script:workflowContent | Should -Match 'runs-on:\s*ubuntu-latest' + $script:workflowContent | Should -Match 'Enforce hosted-runner lock' + $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' + $script:workflowContent | Should -Match 'hosted_runner_required' $script:workflowContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' $script:workflowContent | Should -Match 'release-control-plane-report\.json' $script:workflowContent | Should -Match 'Release Control Plane Alert' From bb5296919cca6415c5969131f16f44b78447911e Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:42:29 -0800 Subject: [PATCH 13/60] Use bundled cdev-cli to execute Windows container installer smoke (#12) Co-authored-by: svelderrainruiz --- .../Invoke-WindowsContainerNsisSelfTest.ps1 | 44 +++++++++++++++++-- ...owsContainerNsisSelfTestContract.Tests.ps1 | 8 +++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 index d9b8dc0..ecd392f 100644 --- a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 +++ b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 @@ -193,6 +193,11 @@ $hostOut = '__OUTPUT_MOUNT__' $workspaceRoot = '__WORKSPACE_ROOT__' $workRoot = 'C:\workspace\nsis-selftest' $installerPath = Join-Path $workRoot 'lvie-cdev-workspace-installer-container-smoke.exe' +$cliExtractRoot = Join-Path $workRoot 'cdev-cli' +$cliBundleZipPath = Join-Path $payloadRoot 'tools\cdev-cli\cdev-cli-win-x64.zip' +$cliEntrypointPath = Join-Path $cliExtractRoot 'cdev-cli\scripts\Invoke-CdevCli.ps1' +$cliRunReportPath = Join-Path $hostOut 'cdev-cli-installer-run-report.json' +$cliRunStatus = '' $installReportPath = Join-Path $workspaceRoot 'artifacts\workspace-install-latest.json' $launchLogPath = Join-Path $workspaceRoot 'artifacts\workspace-installer-launch.log' $buildInstallerScript = Join-Path $repoRoot 'scripts\Build-WorkspaceBootstrapInstaller.ps1' @@ -218,6 +223,9 @@ try { if (-not (Test-Path -LiteralPath $containerMakensisPath -PathType Leaf)) { throw "Mounted NSIS binary not found: $containerMakensisPath" } + if (-not (Test-Path -LiteralPath $cliBundleZipPath -PathType Leaf)) { + throw "Mounted cdev-cli Windows bundle not found: $cliBundleZipPath" + } if (Test-Path -LiteralPath $workRoot -PathType Container) { Remove-Item -LiteralPath $workRoot -Recurse -Force @@ -227,14 +235,15 @@ try { } Ensure-Directory -Path $workRoot Ensure-Directory -Path $workspaceRoot + Ensure-Directory -Path $hostOut - & powershell -NoProfile -ExecutionPolicy Bypass -File $buildInstallerScript ` + & $buildInstallerScript ` -PayloadRoot $payloadRoot ` -OutputPath $installerPath ` -WorkspaceRootDefault $workspaceRoot ` -InstallerExecutionContext 'ContainerSmoke' ` -NsisRoot $nsisRoot ` - -Deterministic:$true | Out-Host + -Deterministic $true | Out-Host if ($LASTEXITCODE -ne 0) { throw "Build-WorkspaceBootstrapInstaller.ps1 failed in container with exit code $LASTEXITCODE" } @@ -245,11 +254,35 @@ try { $installerSha256 = (Get-FileHash -LiteralPath $installerPath -Algorithm SHA256).Hash.ToLowerInvariant() "{0} *{1}" -f $installerSha256, (Split-Path -Path $installerPath -Leaf) | Set-Content -LiteralPath "$installerPath.sha256" -Encoding ascii - & $installerPath '/S' | Out-Host + if (Test-Path -LiteralPath $cliExtractRoot -PathType Container) { + Remove-Item -LiteralPath $cliExtractRoot -Recurse -Force + } + Expand-Archive -LiteralPath $cliBundleZipPath -DestinationPath $cliExtractRoot -Force + if (-not (Test-Path -LiteralPath $cliEntrypointPath -PathType Leaf)) { + throw "cdev-cli entrypoint not found after bundle extraction: $cliEntrypointPath" + } + + & powershell -NoProfile -ExecutionPolicy Bypass -File $cliEntrypointPath ` + -ReportPath $cliRunReportPath ` + installer install ` + --installer-path $installerPath ` + --report-path $installReportPath | Out-Host $installerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } if ($installerExitCode -ne 0) { $reasonCode = 'installer_exit_nonzero' - throw "Installer failed in container with exit code $installerExitCode" + throw "cdev-cli installer install failed in container with exit code $installerExitCode" + } + + if (-not (Test-Path -LiteralPath $cliRunReportPath -PathType Leaf)) { + $reasonCode = 'install_report_missing' + throw "cdev-cli run report missing after installer operation: $cliRunReportPath" + } + + $cliRunReport = Get-Content -LiteralPath $cliRunReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $cliRunStatus = [string]$cliRunReport.status + if ($cliRunStatus -ne 'succeeded') { + $reasonCode = 'installer_exit_nonzero' + throw "cdev-cli run report status is '$cliRunStatus' (expected 'succeeded')." } if (-not (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { @@ -295,6 +328,9 @@ try { installer_path = $installerPath installer_sha256 = $installerSha256 installer_exit_code = $installerExitCode + cdev_cli_entrypoint_path = $cliEntrypointPath + cdev_cli_run_report_path = $cliRunReportPath + cdev_cli_status = $cliRunStatus install_report_path = $installReportPath install_report_status = $installReportStatus install_report_errors = @($installReportErrors) diff --git a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 index a519fd5..42267e3 100644 --- a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 +++ b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 @@ -29,8 +29,14 @@ Describe 'Windows container NSIS self-test contract' { $script:selfTestScriptContent | Should -Match 'Build-RunnerCliBundleFromManifest\.ps1' $script:selfTestScriptContent | Should -Match 'Build-WorkspaceBootstrapInstaller\.ps1' $script:selfTestScriptContent | Should -Match 'Convert-ManifestToWorkspace\.ps1' + $script:selfTestScriptContent | Should -Match 'cdev-cli-win-x64\.zip' + $script:selfTestScriptContent | Should -Match 'Invoke-CdevCli\.ps1' + $script:selfTestScriptContent | Should -Match 'installer install' + $script:selfTestScriptContent | Should -Match '--installer-path \$installerPath' + $script:selfTestScriptContent | Should -Match '--report-path \$installReportPath' + $script:selfTestScriptContent | Should -Match 'cdev-cli-installer-run-report\.json' $script:selfTestScriptContent | Should -Match '-InstallerExecutionContext ''ContainerSmoke''' - $script:selfTestScriptContent | Should -Match '''/S''' + $script:selfTestScriptContent | Should -Match '-Deterministic \$true' $script:selfTestScriptContent | Should -Match 'workspace-install-latest\.json' $script:selfTestScriptContent | Should -Match 'container-report\.json' $script:selfTestScriptContent | Should -Match 'windows-container-nsis-selftest-report\.json' From 1f4eb0a16b3a03ee70c5c32bf534e21729a79283 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:45:30 -0800 Subject: [PATCH 14/60] Pass cdev-cli installer args via CommandArgs in container smoke (#13) Co-authored-by: svelderrainruiz --- scripts/Invoke-WindowsContainerNsisSelfTest.ps1 | 9 ++++++--- tests/WindowsContainerNsisSelfTestContract.Tests.ps1 | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 index ecd392f..706d9d4 100644 --- a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 +++ b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 @@ -264,9 +264,12 @@ try { & powershell -NoProfile -ExecutionPolicy Bypass -File $cliEntrypointPath ` -ReportPath $cliRunReportPath ` - installer install ` - --installer-path $installerPath ` - --report-path $installReportPath | Out-Host + -CommandArgs @( + 'installer', + 'install', + '--installer-path', $installerPath, + '--report-path', $installReportPath + ) | Out-Host $installerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } if ($installerExitCode -ne 0) { $reasonCode = 'installer_exit_nonzero' diff --git a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 index 42267e3..c47e3a0 100644 --- a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 +++ b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 @@ -32,8 +32,9 @@ Describe 'Windows container NSIS self-test contract' { $script:selfTestScriptContent | Should -Match 'cdev-cli-win-x64\.zip' $script:selfTestScriptContent | Should -Match 'Invoke-CdevCli\.ps1' $script:selfTestScriptContent | Should -Match 'installer install' - $script:selfTestScriptContent | Should -Match '--installer-path \$installerPath' - $script:selfTestScriptContent | Should -Match '--report-path \$installReportPath' + $script:selfTestScriptContent | Should -Match '-CommandArgs @\(' + $script:selfTestScriptContent | Should -Match '''--installer-path'', \$installerPath' + $script:selfTestScriptContent | Should -Match '''--report-path'', \$installReportPath' $script:selfTestScriptContent | Should -Match 'cdev-cli-installer-run-report\.json' $script:selfTestScriptContent | Should -Match '-InstallerExecutionContext ''ContainerSmoke''' $script:selfTestScriptContent | Should -Match '-Deterministic \$true' From 621d4cc613366cf8567c61a5d505bc82706c49f6 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:48:22 -0800 Subject: [PATCH 15/60] Invoke cdev-cli script directly in Windows container self-test (#14) Co-authored-by: svelderrainruiz --- scripts/Invoke-WindowsContainerNsisSelfTest.ps1 | 15 +++++++-------- ...WindowsContainerNsisSelfTestContract.Tests.ps1 | 3 ++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 index 706d9d4..77e7552 100644 --- a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 +++ b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 @@ -262,14 +262,13 @@ try { throw "cdev-cli entrypoint not found after bundle extraction: $cliEntrypointPath" } - & powershell -NoProfile -ExecutionPolicy Bypass -File $cliEntrypointPath ` - -ReportPath $cliRunReportPath ` - -CommandArgs @( - 'installer', - 'install', - '--installer-path', $installerPath, - '--report-path', $installReportPath - ) | Out-Host + $cliCommandArgs = @( + 'installer', + 'install', + '--installer-path', $installerPath, + '--report-path', $installReportPath + ) + & $cliEntrypointPath -ReportPath $cliRunReportPath -CommandArgs $cliCommandArgs | Out-Host $installerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } if ($installerExitCode -ne 0) { $reasonCode = 'installer_exit_nonzero' diff --git a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 index c47e3a0..d493815 100644 --- a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 +++ b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 @@ -32,7 +32,8 @@ Describe 'Windows container NSIS self-test contract' { $script:selfTestScriptContent | Should -Match 'cdev-cli-win-x64\.zip' $script:selfTestScriptContent | Should -Match 'Invoke-CdevCli\.ps1' $script:selfTestScriptContent | Should -Match 'installer install' - $script:selfTestScriptContent | Should -Match '-CommandArgs @\(' + $script:selfTestScriptContent | Should -Match '\$cliCommandArgs = @\(' + $script:selfTestScriptContent | Should -Match '-CommandArgs \$cliCommandArgs' $script:selfTestScriptContent | Should -Match '''--installer-path'', \$installerPath' $script:selfTestScriptContent | Should -Match '''--report-path'', \$installReportPath' $script:selfTestScriptContent | Should -Match 'cdev-cli-installer-run-report\.json' From 05a7eefa1ce5d86fa4c6898af0e28b53be41ff8e Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 17:52:35 -0800 Subject: [PATCH 16/60] Handle cdev-cli installer report-path collision in container smoke (#15) Co-authored-by: svelderrainruiz --- .../Invoke-WindowsContainerNsisSelfTest.ps1 | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 index 77e7552..6fc6e1e 100644 --- a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 +++ b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 @@ -197,6 +197,7 @@ $cliExtractRoot = Join-Path $workRoot 'cdev-cli' $cliBundleZipPath = Join-Path $payloadRoot 'tools\cdev-cli\cdev-cli-win-x64.zip' $cliEntrypointPath = Join-Path $cliExtractRoot 'cdev-cli\scripts\Invoke-CdevCli.ps1' $cliRunReportPath = Join-Path $hostOut 'cdev-cli-installer-run-report.json' +$cliRunReportResolvedPath = '' $cliRunStatus = '' $installReportPath = Join-Path $workspaceRoot 'artifacts\workspace-install-latest.json' $launchLogPath = Join-Path $workspaceRoot 'artifacts\workspace-installer-launch.log' @@ -275,35 +276,34 @@ try { throw "cdev-cli installer install failed in container with exit code $installerExitCode" } - if (-not (Test-Path -LiteralPath $cliRunReportPath -PathType Leaf)) { + $cliRunReportResolvedPath = $cliRunReportPath + if (-not (Test-Path -LiteralPath $cliRunReportResolvedPath -PathType Leaf) -and (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { + # Bundled cdev-cli versions may persist their run report to --report-path. + $cliRunReportResolvedPath = $installReportPath + } + if (-not (Test-Path -LiteralPath $cliRunReportResolvedPath -PathType Leaf)) { $reasonCode = 'install_report_missing' throw "cdev-cli run report missing after installer operation: $cliRunReportPath" } - $cliRunReport = Get-Content -LiteralPath $cliRunReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $cliRunReport = Get-Content -LiteralPath $cliRunReportResolvedPath -Raw | ConvertFrom-Json -ErrorAction Stop $cliRunStatus = [string]$cliRunReport.status if ($cliRunStatus -ne 'succeeded') { - $reasonCode = 'installer_exit_nonzero' + $reasonCode = if ((@($cliRunReport.errors) -join "`n") -match 'Expected installer report not found') { 'install_report_missing' } else { 'installer_exit_nonzero' } throw "cdev-cli run report status is '$cliRunStatus' (expected 'succeeded')." } - if (-not (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { - $reasonCode = 'install_report_missing' - throw "Install report not found after container smoke install: $installReportPath" - } - - $installReport = Get-Content -LiteralPath $installReportPath -Raw | ConvertFrom-Json -ErrorAction Stop - $installReportStatus = [string]$installReport.status - $installReportErrors = @($installReport.errors) - $installReportWarnings = @($installReport.warnings) - if ($installReportStatus -ne 'succeeded') { - $reasonCode = 'install_report_failed' - throw "Install report status is '$installReportStatus' (expected 'succeeded')." - } + $installReportStatus = $cliRunStatus + $installReportErrors = @($cliRunReport.errors) + $installReportWarnings = @($cliRunReport.warnings) Copy-Item -LiteralPath $installerPath -Destination (Join-Path $hostOut 'lvie-cdev-workspace-installer-container-smoke.exe') -Force Copy-Item -LiteralPath "$installerPath.sha256" -Destination (Join-Path $hostOut 'lvie-cdev-workspace-installer-container-smoke.exe.sha256') -Force - Copy-Item -LiteralPath $installReportPath -Destination (Join-Path $hostOut 'workspace-install-latest.container-smoke.json') -Force + if (Test-Path -LiteralPath $installReportPath -PathType Leaf) { + Copy-Item -LiteralPath $installReportPath -Destination (Join-Path $hostOut 'workspace-install-latest.container-smoke.json') -Force + } elseif (Test-Path -LiteralPath $cliRunReportResolvedPath -PathType Leaf) { + Copy-Item -LiteralPath $cliRunReportResolvedPath -Destination (Join-Path $hostOut 'workspace-install-latest.container-smoke.json') -Force + } if (Test-Path -LiteralPath $launchLogPath -PathType Leaf) { Copy-Item -LiteralPath $launchLogPath -Destination (Join-Path $hostOut 'workspace-installer-launch.container-smoke.log') -Force } @@ -331,7 +331,7 @@ try { installer_sha256 = $installerSha256 installer_exit_code = $installerExitCode cdev_cli_entrypoint_path = $cliEntrypointPath - cdev_cli_run_report_path = $cliRunReportPath + cdev_cli_run_report_path = if ([string]::IsNullOrWhiteSpace($cliRunReportResolvedPath)) { $cliRunReportPath } else { $cliRunReportResolvedPath } cdev_cli_status = $cliRunStatus install_report_path = $installReportPath install_report_status = $installReportStatus From 98033fb63241ec0044eb6fb77485e53ec32acba8 Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Thu, 26 Feb 2026 18:05:58 -0800 Subject: [PATCH 17/60] Stabilize container smoke with cdev-cli installer execution (#16) Co-authored-by: svelderrainruiz --- nsis/workspace-bootstrap-installer.nsi | 7 +++ .../Invoke-WindowsContainerNsisSelfTest.ps1 | 59 +++++++++++++++---- ...owsContainerNsisSelfTestContract.Tests.ps1 | 11 ++++ 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/nsis/workspace-bootstrap-installer.nsi b/nsis/workspace-bootstrap-installer.nsi index 46a8f6b..a878065 100644 --- a/nsis/workspace-bootstrap-installer.nsi +++ b/nsis/workspace-bootstrap-installer.nsi @@ -95,6 +95,13 @@ Section "Install" Goto labview_x86_ready ${EndIf} + !if "${INSTALL_EXEC_CONTEXT}" == "ContainerSmoke" + FileOpen $2 "${WORKSPACE_ROOT}\${LAUNCH_LOG_REL}" a + FileWrite $2 "x86_bootstrap_skipped_for_container_smoke=true$\r$\n" + FileClose $2 + Goto labview_x86_ready + !endif + IfFileExists "$4" labview_x86_ready 0 ReadEnvStr $5 "${X86_NIPKG_ENV}" FileOpen $2 "${WORKSPACE_ROOT}\${LAUNCH_LOG_REL}" a diff --git a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 index 6fc6e1e..c805e63 100644 --- a/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 +++ b/scripts/Invoke-WindowsContainerNsisSelfTest.ps1 @@ -55,6 +55,17 @@ function Ensure-Directory { } } +function Clear-DirectoryContents { + param([Parameter(Mandatory = $true)][string]$Path) + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + return + } + + Get-ChildItem -LiteralPath $Path -Force -ErrorAction SilentlyContinue | ForEach-Object { + Remove-Item -LiteralPath $_.FullName -Recurse -Force -ErrorAction Stop + } +} + function Assert-Command { param([Parameter(Mandatory = $true)][string]$Name) if (-not (Get-Command $Name -ErrorAction SilentlyContinue)) { @@ -179,6 +190,17 @@ function Ensure-Directory { } } +function Clear-DirectoryContents { + param([Parameter(Mandatory = $true)][string]$Path) + if (-not (Test-Path -LiteralPath $Path -PathType Container)) { + return + } + + Get-ChildItem -LiteralPath $Path -Force -ErrorAction SilentlyContinue | ForEach-Object { + Remove-Item -LiteralPath $_.FullName -Recurse -Force -ErrorAction Stop + } +} + function Assert-Command { param([Parameter(Mandatory = $true)][string]$Name) if (-not (Get-Command $Name -ErrorAction SilentlyContinue)) { @@ -231,9 +253,7 @@ try { if (Test-Path -LiteralPath $workRoot -PathType Container) { Remove-Item -LiteralPath $workRoot -Recurse -Force } - if (Test-Path -LiteralPath $workspaceRoot -PathType Container) { - Remove-Item -LiteralPath $workspaceRoot -Recurse -Force - } + Clear-DirectoryContents -Path $workspaceRoot Ensure-Directory -Path $workRoot Ensure-Directory -Path $workspaceRoot Ensure-Directory -Path $hostOut @@ -263,6 +283,15 @@ try { throw "cdev-cli entrypoint not found after bundle extraction: $cliEntrypointPath" } + Ensure-Directory -Path (Split-Path -Path $installReportPath -Parent) + if (-not (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { + [ordered]@{ + timestamp_utc = (Get-Date).ToUniversalTime().ToString('o') + status = 'seeded' + source = 'windows-container-nsis-selftest' + } | ConvertTo-Json -Depth 4 | Set-Content -LiteralPath $installReportPath -Encoding utf8 + } + $cliCommandArgs = @( 'installer', 'install', @@ -271,10 +300,6 @@ try { ) & $cliEntrypointPath -ReportPath $cliRunReportPath -CommandArgs $cliCommandArgs | Out-Host $installerExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } - if ($installerExitCode -ne 0) { - $reasonCode = 'installer_exit_nonzero' - throw "cdev-cli installer install failed in container with exit code $installerExitCode" - } $cliRunReportResolvedPath = $cliRunReportPath if (-not (Test-Path -LiteralPath $cliRunReportResolvedPath -PathType Leaf) -and (Test-Path -LiteralPath $installReportPath -PathType Leaf)) { @@ -288,14 +313,19 @@ try { $cliRunReport = Get-Content -LiteralPath $cliRunReportResolvedPath -Raw | ConvertFrom-Json -ErrorAction Stop $cliRunStatus = [string]$cliRunReport.status + $cliRunErrors = if ($null -ne $cliRunReport.PSObject.Properties['errors']) { @($cliRunReport.errors) } else { @() } + $cliRunWarnings = if ($null -ne $cliRunReport.PSObject.Properties['warnings']) { @($cliRunReport.warnings) } else { @() } + $installReportStatus = $cliRunStatus + $installReportErrors = $cliRunErrors + $installReportWarnings = $cliRunWarnings if ($cliRunStatus -ne 'succeeded') { - $reasonCode = if ((@($cliRunReport.errors) -join "`n") -match 'Expected installer report not found') { 'install_report_missing' } else { 'installer_exit_nonzero' } + $reasonCode = if (($cliRunErrors -join "`n") -match 'Expected installer report not found') { 'install_report_missing' } else { 'installer_exit_nonzero' } throw "cdev-cli run report status is '$cliRunStatus' (expected 'succeeded')." } - - $installReportStatus = $cliRunStatus - $installReportErrors = @($cliRunReport.errors) - $installReportWarnings = @($cliRunReport.warnings) + if ($installerExitCode -ne 0) { + $reasonCode = 'installer_exit_nonzero' + throw "cdev-cli installer install returned nonzero exit code $installerExitCode." + } Copy-Item -LiteralPath $installerPath -Destination (Join-Path $hostOut 'lvie-cdev-workspace-installer-container-smoke.exe') -Force Copy-Item -LiteralPath "$installerPath.sha256" -Destination (Join-Path $hostOut 'lvie-cdev-workspace-installer-container-smoke.exe.sha256') -Force @@ -360,6 +390,9 @@ $dockerRepoVolume = ('{0}:{1}' -f $repoRoot, $ContainerRepoMount) $dockerOutputVolume = ('{0}:{1}' -f $resolvedOutputRoot, $ContainerOutputMount) $dockerPayloadVolume = ('{0}:{1}' -f $hostPayloadRoot, $ContainerPayloadMount) $dockerNsisVolume = ('{0}:{1}' -f $resolvedHostNsisRoot, $ContainerNsisMount) +$hostWorkspaceMirrorRoot = Join-Path $resolvedOutputRoot 'workspace-root' +Ensure-Directory -Path $hostWorkspaceMirrorRoot +$dockerWorkspaceVolume = ('{0}:{1}' -f $hostWorkspaceMirrorRoot, $ContainerWorkspaceRoot) $containerExitCode = 0 $status = 'unknown' $errors = @() @@ -378,6 +411,7 @@ try { '-v', $dockerOutputVolume, '-v', $dockerPayloadVolume, '-v', $dockerNsisVolume, + '-v', $dockerWorkspaceVolume, $Image, 'powershell', '-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', (Join-Path $ContainerOutputMount 'container-run.ps1') ) @@ -429,6 +463,7 @@ $endedUtc = (Get-Date).ToUniversalTime() output_root = $resolvedOutputRoot host_payload_root = $hostPayloadRoot host_nsis_root = $resolvedHostNsisRoot + host_workspace_root = $hostWorkspaceMirrorRoot container_workspace_root = $ContainerWorkspaceRoot container_exit_code = $containerExitCode container_report_path = $containerReportPath diff --git a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 index d493815..d0dfe46 100644 --- a/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 +++ b/tests/WindowsContainerNsisSelfTestContract.Tests.ps1 @@ -8,6 +8,7 @@ Describe 'Windows container NSIS self-test contract' { $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path $script:selfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' $script:dockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' + $script:installerNsisPath = Join-Path $script:repoRoot 'nsis/workspace-bootstrap-installer.nsi' if (-not (Test-Path -LiteralPath $script:selfTestScriptPath -PathType Leaf)) { throw "Windows container self-test script missing: $script:selfTestScriptPath" @@ -15,9 +16,13 @@ Describe 'Windows container NSIS self-test contract' { if (-not (Test-Path -LiteralPath $script:dockerfilePath -PathType Leaf)) { throw "Windows self-test Dockerfile missing: $script:dockerfilePath" } + if (-not (Test-Path -LiteralPath $script:installerNsisPath -PathType Leaf)) { + throw "Workspace bootstrap NSIS script missing: $script:installerNsisPath" + } $script:selfTestScriptContent = Get-Content -LiteralPath $script:selfTestScriptPath -Raw $script:dockerfileContent = Get-Content -LiteralPath $script:dockerfilePath -Raw + $script:installerNsisContent = Get-Content -LiteralPath $script:installerNsisPath -Raw } It 'builds and runs a Windows containerized NSIS smoke install flow' { @@ -61,6 +66,12 @@ Describe 'Windows container NSIS self-test contract' { $script:dockerfileContent | Should -Not -Match 'nsis-' } + It 'skips x86 LabVIEW package bootstrap when installer context is ContainerSmoke' { + $script:installerNsisContent | Should -Match '!if\s*"\$\{INSTALL_EXEC_CONTEXT\}"\s*==\s*"ContainerSmoke"' + $script:installerNsisContent | Should -Match 'x86_bootstrap_skipped_for_container_smoke' + $script:installerNsisContent | Should -Match 'Goto labview_x86_ready' + } + It 'has parse-safe PowerShell syntax' { $tokens = $null $errors = $null From fe5d661a4ab41ffeabaa8282da13d2c06c2aba65 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 18:54:40 -0800 Subject: [PATCH 18/60] Decouple control-plane health gate and pin ops runtime base --- AGENTS.md | 15 +++++++++++++ README.md | 20 ++++++++++++++++++ scripts/Exercise-ReleaseControlPlaneLocal.ps1 | 4 ++++ scripts/Invoke-OpsAutoRemediation.ps1 | 10 +++++++++ scripts/Invoke-ReleaseControlPlane.ps1 | 4 ++++ scripts/Test-PolicyContracts.ps1 | 8 +++++++ scripts/Test-ReleaseClientContracts.ps1 | 8 +++++++ ...sAutoRemediationWorkflowContract.Tests.ps1 | 8 +++++++ tests/PortableOpsRuntimeContract.Tests.ps1 | 7 ++++--- tests/ReleaseClientPolicyContract.Tests.ps1 | 10 +++++++++ ...lPlaneLocalDockerHarnessContract.Tests.ps1 | 2 ++ ...easeControlPlaneWorkflowContract.Tests.ps1 | 8 +++++++ tests/WorkspaceSurfaceContract.Tests.ps1 | 21 +++++++++++++++++++ tools/ops-runtime/Dockerfile | 6 ++---- tools/ops-runtime/README.md | 3 +++ .../scripts/Test-PolicyContracts.ps1 | 8 +++++++ .../workspace-governance.json | 13 ++++++++++++ workspace-governance.json | 13 ++++++++++++ 18 files changed, 161 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 4c0d709..5b6cc2f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -141,6 +141,13 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - primary repo: `svelderrainruiz/labview-cdev-cli` - mirror repo: `LabVIEW-Community-CI-CD/labview-cdev-cli` - strategy: `fork-and-upstream-full-sync` +- Runtime image metadata is required in `installer_contract.release_client.runtime_images`: + - cdev-cli runtime canonical repository: `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime` + - cdev-cli runtime source repo: `LabVIEW-Community-CI-CD/labview-cdev-cli` + - cdev-cli runtime source commit: `8fef6f9192d81a14add28636c1100c109ae5e977` + - cdev-cli runtime digest: `sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` + - ops runtime repository: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops` + - ops runtime base repository/digest: `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` ## Installer Runtime Gate Contract - Installer runtime (`scripts/Install-WorkspaceFromManifest.ps1`) must fail fast if bundled `runner-cli` integrity checks fail. @@ -181,6 +188,11 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `sync_guard_missing` - `sync_guard_incomplete` - Failure path must upload `ops-monitoring-report.json` and update a single issue titled `Ops Monitoring Alert`. +- Release-control-plane health checks must use release-runner labels only (`self-hosted`, `windows`, `self-hosted-windows-lv`) when invoking `Invoke-OpsMonitoringSnapshot.ps1` from: + - `scripts/Invoke-ReleaseControlPlane.ps1` + - `scripts/Invoke-OpsAutoRemediation.ps1` + - `scripts/Exercise-ReleaseControlPlaneLocal.ps1` +- `.github/workflows/ops-monitoring.yml` remains strict-default and must keep Docker Desktop parity visibility labels in its default snapshot path (`windows-containers`, `user-session`, `cdev-surface-windows-gate`). - `.github/workflows/canary-smoke-tag-hygiene.yml` is the canary smoke tag retention workflow. - It must run `scripts/Invoke-CanarySmokeTagHygiene.ps1` and enforce deterministic keep-latest behavior for tags matching `v0.YYYYMMDD.N`. - `.github/workflows/ops-autoremediate.yml` is the deterministic remediation workflow and must run `scripts/Invoke-OpsAutoRemediation.ps1`. @@ -244,6 +256,9 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Use `scripts/Invoke-WindowsContainerNsisSelfTest.ps1` to build the workspace NSIS installer and run silent install (`/S`) inside the same Windows container with `ContainerSmoke` execution context; this image is aligned to `nationalinstruments/labview:2026q1-windows` and fails fast with `windows_container_mode_required` if Docker is not in Windows container mode. - Use `scripts/Invoke-LinuxContainerNsisParity.ps1 -DockerContext desktop-linux` for parity checks aligned to `nationalinstruments/labview:2026q1-linux`; this lane compiles NSIS smoke output but does not execute Windows installers on Linux. - Use `scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1` for local containerized release-control-plane exercise (`Validate` + `DryRun` default). +- Portable ops runtime image hierarchy is required: + - base image: `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` + - derived image: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops` - If Docker Desktop Linux context is unavailable, confirm `Microsoft-Hyper-V-All`, `VirtualMachinePlatform`, and `Microsoft-Windows-Subsystem-Linux` are enabled, then reboot before retrying. - Use `scripts/Test-RunnerCliBundleDeterminism.ps1` and `scripts/Test-WorkspaceInstallerDeterminism.ps1` locally before proposing release-tag publication. - Keep local iteration artifacts under `artifacts\release\iteration`. diff --git a/README.md b/README.md index 5c199a1..333658d 100644 --- a/README.md +++ b/README.md @@ -341,6 +341,13 @@ Fork/upstream cdev-cli synchronization policy starts with full sync metadata: - Mirror repo: `LabVIEW-Community-CI-CD/labview-cdev-cli` - Strategy: `fork-and-upstream-full-sync` +Runtime image metadata is codified in `installer_contract.release_client.runtime_images`: +- cdev-cli runtime canonical repository: `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime` +- cdev-cli runtime source repo/commit: `LabVIEW-Community-CI-CD/labview-cdev-cli` @ `8fef6f9192d81a14add28636c1100c109ae5e977` +- cdev-cli runtime digest: `sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` +- ops runtime repository: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops` +- ops runtime base repository/digest: `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` + Release channel metadata can be set during publish with workflow input `release_channel` (`stable`, `prerelease`, `canary`). ## Ops monitoring and hygiene @@ -349,6 +356,10 @@ Release channel metadata can be set during publish with workflow input `release_ - runner availability drift (`runner_unavailable`) - cdev-cli sync-guard drift/failure (`sync_guard_failed`, `sync_guard_stale`, `sync_guard_missing`, `sync_guard_incomplete`) +Control-plane runner health is intentionally decoupled from Docker Desktop parity labels: +- `scripts/Invoke-ReleaseControlPlane.ps1` and `scripts/Invoke-OpsAutoRemediation.ps1` call ops monitoring with release-runner labels only (`self-hosted`, `windows`, `self-hosted-windows-lv`). +- `ops-monitoring.yml` keeps strict defaults for Docker Desktop Windows gate visibility (`self-hosted`, `windows`, `self-hosted-windows-lv`, `windows-containers`, `user-session`, `cdev-surface-windows-gate`). + Every run uploads `ops-monitoring-report.json`. On failure, automation updates a single tracking issue (`Ops Monitoring Alert`). `canary-smoke-tag-hygiene.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-CanarySmokeTagHygiene.ps1` to keep latest `v0.YYYYMMDD.N` canary smoke tag(s) for a UTC date and delete older tags deterministically. @@ -389,6 +400,9 @@ pwsh -NoProfile -File .\scripts\Invoke-ReleaseControlPlaneLocalDocker.ps1 ` This executes `scripts/Exercise-ReleaseControlPlaneLocal.ps1` in the portable ops container image and writes artifacts under: - `artifacts\release-control-plane-local` - Default container image: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops:v1` +- 2-image hierarchy: + - Base: `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` + - Derived ops runtime: `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops` For offline or container runtime fallback on the host: - add `-HostFallback` @@ -403,6 +417,12 @@ Deterministic tags: - `v1-YYYYMMDD` - `v1` (when `promote_v1=true`) +Ops runtime build policy: +- Base image is digest-pinned to canonical cdev-cli runtime: + - `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` +- Canonical consumer path remains org namespace: + - `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops` + Manual publish: ```powershell diff --git a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 index 7e26771..f5b00d1 100644 --- a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 +++ b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 @@ -95,9 +95,12 @@ function Add-StepResult { } try { + $releaseRunnerLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') + $opsSnapshotPath = Join-Path $resolvedOutputRoot 'ops-monitoring-report.json' & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $Repository ` + -RequiredRunnerLabels $releaseRunnerLabels ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $opsSnapshotPath if ($LASTEXITCODE -ne 0) { @@ -109,6 +112,7 @@ try { $opsRemediatePath = Join-Path $resolvedOutputRoot 'ops-autoremediate-report.json' & pwsh -NoProfile -File $opsRemediateScript ` -SurfaceRepository $Repository ` + -RequiredRunnerLabels $releaseRunnerLabels ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $opsRemediatePath if ($LASTEXITCODE -ne 0) { diff --git a/scripts/Invoke-OpsAutoRemediation.ps1 b/scripts/Invoke-OpsAutoRemediation.ps1 index 9571e15..06609bf 100644 --- a/scripts/Invoke-OpsAutoRemediation.ps1 +++ b/scripts/Invoke-OpsAutoRemediation.ps1 @@ -21,6 +21,14 @@ param( [ValidateRange(1, 168)] [int]$SyncGuardMaxAgeHours = 12, + [Parameter()] + [ValidateNotNullOrEmpty()] + [string[]]$RequiredRunnerLabels = @( + 'self-hosted', + 'windows', + 'self-hosted-windows-lv' + ), + [Parameter()] [ValidateRange(5, 180)] [int]$WatchTimeoutMinutes = 45, @@ -67,6 +75,7 @@ try { $preHealthPath = Join-Path $tempRoot 'pre-health.json' & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $SurfaceRepository ` + -RequiredRunnerLabels $RequiredRunnerLabels ` -SyncGuardRepository $SyncGuardRepository ` -SyncGuardWorkflow $SyncGuardWorkflow ` -SyncGuardBranch $SyncGuardBranch ` @@ -138,6 +147,7 @@ try { try { & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $SurfaceRepository ` + -RequiredRunnerLabels $RequiredRunnerLabels ` -SyncGuardRepository $SyncGuardRepository ` -SyncGuardWorkflow $SyncGuardWorkflow ` -SyncGuardBranch $SyncGuardBranch ` diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index d3607cd..9c7410a 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -49,6 +49,7 @@ $opsRemediateScript = Join-Path $PSScriptRoot 'Invoke-OpsAutoRemediation.ps1' $dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' $watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' $canaryHygieneScript = Join-Path $PSScriptRoot 'Invoke-CanarySmokeTagHygiene.ps1' +$releaseRunnerLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $dispatchWorkflowScript, $watchWorkflowScript, $canaryHygieneScript)) { if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { @@ -347,6 +348,7 @@ try { try { & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $Repository ` + -RequiredRunnerLabels $releaseRunnerLabels ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $preHealthPath if ($LASTEXITCODE -eq 0) { @@ -364,6 +366,7 @@ try { $remediationPath = Join-Path $scratchRoot 'remediation.json' & pwsh -NoProfile -File $opsRemediateScript ` -SurfaceRepository $Repository ` + -RequiredRunnerLabels $releaseRunnerLabels ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $remediationPath if (Test-Path -LiteralPath $remediationPath -PathType Leaf) { @@ -374,6 +377,7 @@ try { $postHealthPath = Join-Path $scratchRoot 'post-health.json' & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $Repository ` + -RequiredRunnerLabels $releaseRunnerLabels ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $postHealthPath if ($LASTEXITCODE -ne 0) { diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 0068dab..61be9fd 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -160,6 +160,14 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_primary' -Passed ([string]$releaseClient.cdev_cli_sync.primary_repo -eq 'svelderrainruiz/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.primary_repo) Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_mirror' -Passed ([string]$releaseClient.cdev_cli_sync.mirror_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.mirror_repo) Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_strategy' -Passed ([string]$releaseClient.cdev_cli_sync.strategy -eq 'fork-and-upstream-full-sync') -Detail ([string]$releaseClient.cdev_cli_sync.strategy) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_exists' -Passed ($null -ne $releaseClient.runtime_images) -Detail 'installer_contract.release_client.runtime_images' + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_repository' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.canonical_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.canonical_repository) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_source_repo' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_repo) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_source_commit' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_commit -eq '8fef6f9192d81a14add28636c1100c109ae5e977') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_commit) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_digest' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.digest) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops') -Detail ([string]$releaseClient.runtime_images.ops_runtime.repository) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) } $requiredSchemaFields = @( diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 8ffd212..3d2f9b3 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -83,6 +83,14 @@ if ($null -ne $releaseClient) { Add-Check -Name 'cdev_cli_sync_primary_repo' -Passed ([string]$releaseClient.cdev_cli_sync.primary_repo -eq 'svelderrainruiz/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.primary_repo) Add-Check -Name 'cdev_cli_sync_mirror_repo' -Passed ([string]$releaseClient.cdev_cli_sync.mirror_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.mirror_repo) Add-Check -Name 'cdev_cli_sync_strategy' -Passed ([string]$releaseClient.cdev_cli_sync.strategy -eq 'fork-and-upstream-full-sync') -Detail ([string]$releaseClient.cdev_cli_sync.strategy) + Add-Check -Name 'runtime_images_exists' -Passed ($null -ne $releaseClient.runtime_images) -Detail 'installer_contract.release_client.runtime_images' + Add-Check -Name 'runtime_images_cdev_cli_runtime_canonical_repository' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.canonical_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.canonical_repository) + Add-Check -Name 'runtime_images_cdev_cli_runtime_source_repo' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_repo) + Add-Check -Name 'runtime_images_cdev_cli_runtime_source_commit' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_commit -eq '8fef6f9192d81a14add28636c1100c109ae5e977') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_commit) + Add-Check -Name 'runtime_images_cdev_cli_runtime_digest' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.digest) + Add-Check -Name 'runtime_images_ops_runtime_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops') -Detail ([string]$releaseClient.runtime_images.ops_runtime.repository) + Add-Check -Name 'runtime_images_ops_runtime_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) + Add-Check -Name 'runtime_images_ops_runtime_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) if ([DateTime]::Parse([string]$releaseClient.signature_policy.dual_mode_start_utc) -gt [DateTime]::Parse([string]$releaseClient.signature_policy.canary_enforce_utc)) { Add-Check -Name 'signature_date_order_dual_before_canary' -Passed $false -Detail 'dual_mode_start_utc must be <= canary_enforce_utc' diff --git a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 index 3a6ca8e..8950b71 100644 --- a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 +++ b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 @@ -43,4 +43,12 @@ Describe 'Ops auto-remediation workflow contract' { $script:runtimeContent | Should -Match 'no_automatable_action' $script:runtimeContent | Should -Match 'remediation_failed' } + + It 'uses release-runner labels for control-plane remediation health checks' { + $script:runtimeContent | Should -Match "self-hosted',\s*'windows',\s*'self-hosted-windows-lv" + $script:runtimeContent | Should -Match 'RequiredRunnerLabels \$RequiredRunnerLabels' + $script:runtimeContent | Should -Not -Match 'windows-containers' + $script:runtimeContent | Should -Not -Match 'user-session' + $script:runtimeContent | Should -Not -Match 'cdev-surface-windows-gate' + } } diff --git a/tests/PortableOpsRuntimeContract.Tests.ps1 b/tests/PortableOpsRuntimeContract.Tests.ps1 index 86930db..a756175 100644 --- a/tests/PortableOpsRuntimeContract.Tests.ps1 +++ b/tests/PortableOpsRuntimeContract.Tests.ps1 @@ -18,9 +18,10 @@ Describe 'Portable ops runtime contract' { $script:wrapperContent = Get-Content -LiteralPath $script:wrapper -Raw } - It 'pins a PowerShell-based container runtime with git gh jq' { - $script:dockerContent | Should -Match 'mcr\.microsoft\.com/powershell' - $script:dockerContent | Should -Match 'git jq gh' + It 'pins cdev-cli runtime base by digest and resets entrypoint for ops scripts' { + $script:dockerContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $script:dockerContent | Should -Match 'ENTRYPOINT \[\]' + $script:dockerContent | Should -Match 'Install-Module -Name Pester' } It 'mounts workspace and forwards GH_TOKEN to containerized ops scripts' { diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index a2415a5..38a6527 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -45,6 +45,13 @@ Describe 'Release client policy contract' { $releaseClient.cdev_cli_sync.primary_repo | Should -Be 'svelderrainruiz/labview-cdev-cli' $releaseClient.cdev_cli_sync.mirror_repo | Should -Be 'LabVIEW-Community-CI-CD/labview-cdev-cli' $releaseClient.cdev_cli_sync.strategy | Should -Be 'fork-and-upstream-full-sync' + $releaseClient.runtime_images.cdev_cli_runtime.canonical_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' + $releaseClient.runtime_images.cdev_cli_runtime.source_repo | Should -Be 'LabVIEW-Community-CI-CD/labview-cdev-cli' + $releaseClient.runtime_images.cdev_cli_runtime.source_commit | Should -Be '8fef6f9192d81a14add28636c1100c109ae5e977' + $releaseClient.runtime_images.cdev_cli_runtime.digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $releaseClient.runtime_images.ops_runtime.repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops' + $releaseClient.runtime_images.ops_runtime.base_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' + $releaseClient.runtime_images.ops_runtime.base_digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' ($script:payloadManifest | ConvertTo-Json -Depth 100) | Should -Be ($script:manifest | ConvertTo-Json -Depth 100) } @@ -56,6 +63,9 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'svelderrainruiz/labview-cdev-surface' $script:policyScriptContent | Should -Match 'cdev_cli_sync_primary_repo' $script:policyScriptContent | Should -Match 'cdev_cli_sync_mirror_repo' + $script:policyScriptContent | Should -Match 'runtime_images_exists' + $script:policyScriptContent | Should -Match 'runtime_images_cdev_cli_runtime_canonical_repository' + $script:policyScriptContent | Should -Match 'runtime_images_ops_runtime_base_digest' } It 'has parse-safe PowerShell syntax' { diff --git a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 index 8456af8..6ef9d4a 100644 --- a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 @@ -33,6 +33,8 @@ Describe 'Release control plane local Docker harness contract' { $script:harnessContent | Should -Match 'Invoke-OpsAutoRemediation\.ps1' $script:harnessContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' $script:harnessContent | Should -Match 'Write-OpsSloReport\.ps1' + $script:harnessContent | Should -Match 'RequiredRunnerLabels \$releaseRunnerLabels' + $script:harnessContent | Should -Match "self-hosted', 'windows', 'self-hosted-windows-lv" $script:harnessContent | Should -Match 'release-control-plane-local-summary\.json' } diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 9d755ff..2141b6f 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -56,4 +56,12 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'release_tag_range_exhausted' $script:runtimeContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' } + + It 'decouples control-plane runner health gate to release-runner labels' { + $script:runtimeContent | Should -Match 'RequiredRunnerLabels \$releaseRunnerLabels' + $script:runtimeContent | Should -Match "self-hosted', 'windows', 'self-hosted-windows-lv" + $script:runtimeContent | Should -Not -Match 'windows-containers' + $script:runtimeContent | Should -Not -Match 'user-session' + $script:runtimeContent | Should -Not -Match 'cdev-surface-windows-gate' + } } diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 285e565..2e803cd 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -246,6 +246,13 @@ Describe 'Workspace surface contract' { $script:manifest.installer_contract.release_client.cdev_cli_sync.primary_repo | Should -Be 'svelderrainruiz/labview-cdev-cli' $script:manifest.installer_contract.release_client.cdev_cli_sync.mirror_repo | Should -Be 'LabVIEW-Community-CI-CD/labview-cdev-cli' $script:manifest.installer_contract.release_client.cdev_cli_sync.strategy | Should -Be 'fork-and-upstream-full-sync' + $script:manifest.installer_contract.release_client.runtime_images.cdev_cli_runtime.canonical_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' + $script:manifest.installer_contract.release_client.runtime_images.cdev_cli_runtime.source_repo | Should -Be 'LabVIEW-Community-CI-CD/labview-cdev-cli' + $script:manifest.installer_contract.release_client.runtime_images.cdev_cli_runtime.source_commit | Should -Be '8fef6f9192d81a14add28636c1100c109ae5e977' + $script:manifest.installer_contract.release_client.runtime_images.cdev_cli_runtime.digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $script:manifest.installer_contract.release_client.runtime_images.ops_runtime.repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops' + $script:manifest.installer_contract.release_client.runtime_images.ops_runtime.base_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' + $script:manifest.installer_contract.release_client.runtime_images.ops_runtime.base_digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' foreach ($repo in @($script:manifest.managed_repos)) { $repo.PSObject.Properties.Name | Should -Contain 'required_gh_repo' $repo.PSObject.Properties.Name | Should -Contain 'default_branch' @@ -326,6 +333,13 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'workspace-release-client-latest\.json' $script:agentsContent | Should -Match 'svelderrainruiz/labview-cdev-cli' $script:agentsContent | Should -Match 'LabVIEW-Community-CI-CD/labview-cdev-cli' + $script:agentsContent | Should -Match 'runtime_images' + $script:agentsContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime' + $script:agentsContent | Should -Match '8fef6f9192d81a14add28636c1100c109ae5e977' + $script:agentsContent | Should -Match '0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $script:agentsContent | Should -Match 'release-runner labels only' + $script:agentsContent | Should -Match 'windows-containers' + $script:agentsContent | Should -Match 'cdev-surface-windows-gate' $script:readmeContent | Should -Match 'Workspace SHA Refresh PR' $script:readmeContent | Should -Match 'automation/sha-refresh' $script:readmeContent | Should -Match 'Invoke-CdevCli\.ps1' @@ -346,6 +360,13 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'Install-WorkspaceInstallerFromRelease\.ps1' $script:readmeContent | Should -Match 'workspace-release-state\.json' $script:readmeContent | Should -Match 'workspace-release-client-latest\.json' + $script:readmeContent | Should -Match 'runtime_images' + $script:readmeContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime' + $script:readmeContent | Should -Match '8fef6f9192d81a14add28636c1100c109ae5e977' + $script:readmeContent | Should -Match '0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $script:readmeContent | Should -Match 'release-runner labels only' + $script:readmeContent | Should -Match 'windows-containers' + $script:readmeContent | Should -Match 'cdev-surface-windows-gate' } It 'documents Windows feature troubleshooting reporting contract for Docker gating' { diff --git a/tools/ops-runtime/Dockerfile b/tools/ops-runtime/Dockerfile index 3e6f6a4..858ab92 100644 --- a/tools/ops-runtime/Dockerfile +++ b/tools/ops-runtime/Dockerfile @@ -1,8 +1,6 @@ -FROM mcr.microsoft.com/powershell:7.4-ubuntu-22.04 +FROM ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423 -RUN apt-get update \ - && apt-get install -y --no-install-recommends git jq gh ca-certificates \ - && rm -rf /var/lib/apt/lists/* +ENTRYPOINT [] RUN pwsh -NoLogo -NoProfile -Command "Set-PSRepository -Name PSGallery -InstallationPolicy Trusted; Install-Module -Name Pester -Scope AllUsers -Force -MinimumVersion 5.5.0" diff --git a/tools/ops-runtime/README.md b/tools/ops-runtime/README.md index a3a466b..475a71e 100644 --- a/tools/ops-runtime/README.md +++ b/tools/ops-runtime/README.md @@ -5,6 +5,9 @@ This container is the portable Docker package for local ops exercises. Default image: - `ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops:v1` +Base image (digest pinned): +- `ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime@sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423` + Build locally: ```powershell diff --git a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 index 0068dab..61be9fd 100644 --- a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 +++ b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 @@ -160,6 +160,14 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_primary' -Passed ([string]$releaseClient.cdev_cli_sync.primary_repo -eq 'svelderrainruiz/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.primary_repo) Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_mirror' -Passed ([string]$releaseClient.cdev_cli_sync.mirror_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.cdev_cli_sync.mirror_repo) Add-Check -Scope 'manifest' -Name 'release_client_cli_sync_strategy' -Passed ([string]$releaseClient.cdev_cli_sync.strategy -eq 'fork-and-upstream-full-sync') -Detail ([string]$releaseClient.cdev_cli_sync.strategy) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_exists' -Passed ($null -ne $releaseClient.runtime_images) -Detail 'installer_contract.release_client.runtime_images' + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_repository' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.canonical_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.canonical_repository) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_source_repo' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_repo -eq 'LabVIEW-Community-CI-CD/labview-cdev-cli') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_repo) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_source_commit' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_commit -eq '8fef6f9192d81a14add28636c1100c109ae5e977') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.source_commit) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_cdev_cli_runtime_digest' -Passed ([string]$releaseClient.runtime_images.cdev_cli_runtime.digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.cdev_cli_runtime.digest) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops') -Detail ([string]$releaseClient.runtime_images.ops_runtime.repository) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) + Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) } $requiredSchemaFields = @( diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index f122468..4723e75 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -240,6 +240,19 @@ "state_path": "C:\\dev\\artifacts\\workspace-release-state.json", "latest_report_path": "C:\\dev\\artifacts\\workspace-release-client-latest.json", "policy_path": "C:\\dev\\workspace-governance\\release-policy.json", + "runtime_images": { + "cdev_cli_runtime": { + "canonical_repository": "ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime", + "source_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", + "source_commit": "8fef6f9192d81a14add28636c1100c109ae5e977", + "digest": "sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423" + }, + "ops_runtime": { + "repository": "ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops", + "base_repository": "ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime", + "base_digest": "sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423" + } + }, "cdev_cli_sync": { "primary_repo": "svelderrainruiz/labview-cdev-cli", "mirror_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", diff --git a/workspace-governance.json b/workspace-governance.json index f122468..4723e75 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -240,6 +240,19 @@ "state_path": "C:\\dev\\artifacts\\workspace-release-state.json", "latest_report_path": "C:\\dev\\artifacts\\workspace-release-client-latest.json", "policy_path": "C:\\dev\\workspace-governance\\release-policy.json", + "runtime_images": { + "cdev_cli_runtime": { + "canonical_repository": "ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime", + "source_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", + "source_commit": "8fef6f9192d81a14add28636c1100c109ae5e977", + "digest": "sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423" + }, + "ops_runtime": { + "repository": "ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops", + "base_repository": "ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime", + "base_digest": "sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423" + } + }, "cdev_cli_sync": { "primary_repo": "svelderrainruiz/labview-cdev-cli", "mirror_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", From c1a803742257242a62e387a17dcc967576d92b92 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 19:00:25 -0800 Subject: [PATCH 19/60] Fix runner-label argument passing for control-plane workflows --- scripts/Exercise-ReleaseControlPlaneLocal.ps1 | 5 ++-- scripts/Invoke-OpsAutoRemediation.ps1 | 23 +++++++++++++++++-- scripts/Invoke-OpsMonitoringSnapshot.ps1 | 11 +++++++++ scripts/Invoke-ReleaseControlPlane.ps1 | 7 +++--- ...sAutoRemediationWorkflowContract.Tests.ps1 | 2 +- ...lPlaneLocalDockerHarnessContract.Tests.ps1 | 2 +- ...easeControlPlaneWorkflowContract.Tests.ps1 | 2 +- 7 files changed, 42 insertions(+), 10 deletions(-) diff --git a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 index f5b00d1..678373a 100644 --- a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 +++ b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 @@ -96,11 +96,12 @@ function Add-StepResult { try { $releaseRunnerLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') + $releaseRunnerLabelsCsv = [string]::Join(',', $releaseRunnerLabels) $opsSnapshotPath = Join-Path $resolvedOutputRoot 'ops-monitoring-report.json' & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $Repository ` - -RequiredRunnerLabels $releaseRunnerLabels ` + -RequiredRunnerLabelsCsv $releaseRunnerLabelsCsv ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $opsSnapshotPath if ($LASTEXITCODE -ne 0) { @@ -112,7 +113,7 @@ try { $opsRemediatePath = Join-Path $resolvedOutputRoot 'ops-autoremediate-report.json' & pwsh -NoProfile -File $opsRemediateScript ` -SurfaceRepository $Repository ` - -RequiredRunnerLabels $releaseRunnerLabels ` + -RequiredRunnerLabelsCsv $releaseRunnerLabelsCsv ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $opsRemediatePath if ($LASTEXITCODE -ne 0) { diff --git a/scripts/Invoke-OpsAutoRemediation.ps1 b/scripts/Invoke-OpsAutoRemediation.ps1 index 06609bf..56d0d94 100644 --- a/scripts/Invoke-OpsAutoRemediation.ps1 +++ b/scripts/Invoke-OpsAutoRemediation.ps1 @@ -29,6 +29,9 @@ param( 'self-hosted-windows-lv' ), + [Parameter()] + [string]$RequiredRunnerLabelsCsv = '', + [Parameter()] [ValidateRange(5, 180)] [int]$WatchTimeoutMinutes = 45, @@ -52,6 +55,22 @@ foreach ($requiredScript in @($opsSnapshotScript, $dispatchWorkflowScript, $watc } } +if (-not [string]::IsNullOrWhiteSpace($RequiredRunnerLabelsCsv)) { + $RequiredRunnerLabels = @( + $RequiredRunnerLabelsCsv.Split(',') | + ForEach-Object { ([string]$_).Trim() } | + Where-Object { -not [string]::IsNullOrWhiteSpace($_) } + ) +} +$normalizedRequiredRunnerLabels = @( + @($RequiredRunnerLabels | ForEach-Object { ([string]$_).Trim() } | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) | + Select-Object -Unique +) +if (@($normalizedRequiredRunnerLabels).Count -eq 0) { + throw 'required_runner_labels_empty' +} +$requiredRunnerLabelsCsv = [string]::Join(',', $normalizedRequiredRunnerLabels) + $tempRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("ops-auto-remediate-" + [Guid]::NewGuid().ToString('N')) New-Item -Path $tempRoot -ItemType Directory -Force | Out-Null @@ -75,7 +94,7 @@ try { $preHealthPath = Join-Path $tempRoot 'pre-health.json' & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $SurfaceRepository ` - -RequiredRunnerLabels $RequiredRunnerLabels ` + -RequiredRunnerLabelsCsv $requiredRunnerLabelsCsv ` -SyncGuardRepository $SyncGuardRepository ` -SyncGuardWorkflow $SyncGuardWorkflow ` -SyncGuardBranch $SyncGuardBranch ` @@ -147,7 +166,7 @@ try { try { & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $SurfaceRepository ` - -RequiredRunnerLabels $RequiredRunnerLabels ` + -RequiredRunnerLabelsCsv $requiredRunnerLabelsCsv ` -SyncGuardRepository $SyncGuardRepository ` -SyncGuardWorkflow $SyncGuardWorkflow ` -SyncGuardBranch $SyncGuardBranch ` diff --git a/scripts/Invoke-OpsMonitoringSnapshot.ps1 b/scripts/Invoke-OpsMonitoringSnapshot.ps1 index c970a68..e55a83d 100644 --- a/scripts/Invoke-OpsMonitoringSnapshot.ps1 +++ b/scripts/Invoke-OpsMonitoringSnapshot.ps1 @@ -16,6 +16,9 @@ param( 'cdev-surface-windows-gate' ), + [Parameter()] + [string]$RequiredRunnerLabelsCsv = '', + [Parameter()] [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] [string]$SyncGuardRepository = 'LabVIEW-Community-CI-CD/labview-cdev-cli', @@ -41,6 +44,14 @@ $ErrorActionPreference = 'Stop' . (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') +if (-not [string]::IsNullOrWhiteSpace($RequiredRunnerLabelsCsv)) { + $RequiredRunnerLabels = @( + $RequiredRunnerLabelsCsv.Split(',') | + ForEach-Object { ([string]$_).Trim() } | + Where-Object { -not [string]::IsNullOrWhiteSpace($_) } + ) +} + function Convert-RunRecord { param([Parameter(Mandatory = $true)][object]$Run) diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 9c7410a..e4d725a 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -50,6 +50,7 @@ $dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead $watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' $canaryHygieneScript = Join-Path $PSScriptRoot 'Invoke-CanarySmokeTagHygiene.ps1' $releaseRunnerLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') +$releaseRunnerLabelsCsv = [string]::Join(',', $releaseRunnerLabels) foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $dispatchWorkflowScript, $watchWorkflowScript, $canaryHygieneScript)) { if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { @@ -348,7 +349,7 @@ try { try { & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $Repository ` - -RequiredRunnerLabels $releaseRunnerLabels ` + -RequiredRunnerLabelsCsv $releaseRunnerLabelsCsv ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $preHealthPath if ($LASTEXITCODE -eq 0) { @@ -366,7 +367,7 @@ try { $remediationPath = Join-Path $scratchRoot 'remediation.json' & pwsh -NoProfile -File $opsRemediateScript ` -SurfaceRepository $Repository ` - -RequiredRunnerLabels $releaseRunnerLabels ` + -RequiredRunnerLabelsCsv $releaseRunnerLabelsCsv ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $remediationPath if (Test-Path -LiteralPath $remediationPath -PathType Leaf) { @@ -377,7 +378,7 @@ try { $postHealthPath = Join-Path $scratchRoot 'post-health.json' & pwsh -NoProfile -File $opsSnapshotScript ` -SurfaceRepository $Repository ` - -RequiredRunnerLabels $releaseRunnerLabels ` + -RequiredRunnerLabelsCsv $releaseRunnerLabelsCsv ` -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -OutputPath $postHealthPath if ($LASTEXITCODE -ne 0) { diff --git a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 index 8950b71..eba099e 100644 --- a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 +++ b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 @@ -46,7 +46,7 @@ Describe 'Ops auto-remediation workflow contract' { It 'uses release-runner labels for control-plane remediation health checks' { $script:runtimeContent | Should -Match "self-hosted',\s*'windows',\s*'self-hosted-windows-lv" - $script:runtimeContent | Should -Match 'RequiredRunnerLabels \$RequiredRunnerLabels' + $script:runtimeContent | Should -Match 'RequiredRunnerLabelsCsv \$requiredRunnerLabelsCsv' $script:runtimeContent | Should -Not -Match 'windows-containers' $script:runtimeContent | Should -Not -Match 'user-session' $script:runtimeContent | Should -Not -Match 'cdev-surface-windows-gate' diff --git a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 index 6ef9d4a..fbeaab2 100644 --- a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 @@ -33,7 +33,7 @@ Describe 'Release control plane local Docker harness contract' { $script:harnessContent | Should -Match 'Invoke-OpsAutoRemediation\.ps1' $script:harnessContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' $script:harnessContent | Should -Match 'Write-OpsSloReport\.ps1' - $script:harnessContent | Should -Match 'RequiredRunnerLabels \$releaseRunnerLabels' + $script:harnessContent | Should -Match 'RequiredRunnerLabelsCsv \$releaseRunnerLabelsCsv' $script:harnessContent | Should -Match "self-hosted', 'windows', 'self-hosted-windows-lv" $script:harnessContent | Should -Match 'release-control-plane-local-summary\.json' } diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 2141b6f..51f272d 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -58,7 +58,7 @@ Describe 'Release control plane workflow contract' { } It 'decouples control-plane runner health gate to release-runner labels' { - $script:runtimeContent | Should -Match 'RequiredRunnerLabels \$releaseRunnerLabels' + $script:runtimeContent | Should -Match 'RequiredRunnerLabelsCsv \$releaseRunnerLabelsCsv' $script:runtimeContent | Should -Match "self-hosted', 'windows', 'self-hosted-windows-lv" $script:runtimeContent | Should -Not -Match 'windows-containers' $script:runtimeContent | Should -Not -Match 'user-session' From 89f20a79a928c161974fef1c3dc8cda1ca8d7e1b Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 19:16:33 -0800 Subject: [PATCH 20/60] Add Scope A control-plane hardening gates and incident lifecycle --- .github/workflows/ci.yml | 4 + .github/workflows/ops-autoremediate.yml | 49 +++-- .github/workflows/ops-monitoring.yml | 48 +++-- .github/workflows/ops-policy-drift-check.yml | 104 ++++++++++ .github/workflows/ops-slo-gate.yml | 154 ++++++++++++++ .github/workflows/release-control-plane.yml | 55 +++-- .github/workflows/release-rollback-drill.yml | 136 +++++++++++++ AGENTS.md | 25 +++ README.md | 33 ++- .../runbooks/release-ops-incident-response.md | 36 ++++ scripts/Invoke-OpsIncidentLifecycle.ps1 | 191 ++++++++++++++++++ scripts/Invoke-ReleaseRollbackDrill.ps1 | 183 +++++++++++++++++ scripts/Test-OpsSloGate.ps1 | 190 +++++++++++++++++ scripts/Test-PolicyContracts.ps1 | 12 ++ scripts/Test-ReleaseClientContracts.ps1 | 13 ++ .../Test-ReleaseControlPlanePolicyDrift.ps1 | 122 +++++++++++ ...sAutoRemediationWorkflowContract.Tests.ps1 | 4 +- tests/OpsIncidentLifecycleContract.Tests.ps1 | 38 ++++ tests/OpsMonitoringWorkflowContract.Tests.ps1 | 3 + .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 42 ++++ tests/OpsSloGateWorkflowContract.Tests.ps1 | 50 +++++ tests/ReleaseClientPolicyContract.Tests.ps1 | 17 ++ ...easeControlPlaneWorkflowContract.Tests.ps1 | 3 + ...aseRollbackDrillWorkflowContract.Tests.ps1 | 48 +++++ tests/ScopeAOpsRunbookContract.Tests.ps1 | 10 + tests/WorkspaceSurfaceContract.Tests.ps1 | 47 +++++ .../scripts/Test-PolicyContracts.ps1 | 12 ++ .../workspace-governance.json | 29 +++ workspace-governance.json | 29 +++ 29 files changed, 1632 insertions(+), 55 deletions(-) create mode 100644 .github/workflows/ops-policy-drift-check.yml create mode 100644 .github/workflows/ops-slo-gate.yml create mode 100644 .github/workflows/release-rollback-drill.yml create mode 100644 scripts/Invoke-OpsIncidentLifecycle.ps1 create mode 100644 scripts/Invoke-ReleaseRollbackDrill.ps1 create mode 100644 scripts/Test-OpsSloGate.ps1 create mode 100644 scripts/Test-ReleaseControlPlanePolicyDrift.ps1 create mode 100644 tests/OpsIncidentLifecycleContract.Tests.ps1 create mode 100644 tests/OpsPolicyDriftWorkflowContract.Tests.ps1 create mode 100644 tests/OpsSloGateWorkflowContract.Tests.ps1 create mode 100644 tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41d72e3..746cf45 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,8 +79,12 @@ jobs: './tests/UploadArtifactRetryCompositeContract.Tests.ps1', './tests/InstallerHarnessWorkflowContract.Tests.ps1', './tests/OpsMonitoringWorkflowContract.Tests.ps1', + './tests/OpsIncidentLifecycleContract.Tests.ps1', './tests/OpsAutoRemediationWorkflowContract.Tests.ps1', + './tests/OpsSloGateWorkflowContract.Tests.ps1', + './tests/OpsPolicyDriftWorkflowContract.Tests.ps1', './tests/ReleaseControlPlaneWorkflowContract.Tests.ps1', + './tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1', './tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1', './tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1', './tests/NightlySupplyChainCanaryWorkflowContract.Tests.ps1', diff --git a/.github/workflows/ops-autoremediate.yml b/.github/workflows/ops-autoremediate.yml index 801c975..af06095 100644 --- a/.github/workflows/ops-autoremediate.yml +++ b/.github/workflows/ops-autoremediate.yml @@ -54,7 +54,7 @@ jobs: path: ${{ runner.temp }}/ops-autoremediate-report.json if-no-files-found: error - - name: Open or update auto-remediation incident issue on failure + - name: Update auto-remediation incident issue on failure if: failure() shell: pwsh env: @@ -80,20 +80,33 @@ jobs: - Sync guard repository: $($report.sync_guard_repository) "@ - $existingJson = & gh issue list -R $env:REPOSITORY --state open --search "$title in:title" --json number --limit 1 - if ($LASTEXITCODE -ne 0) { - throw 'Failed to query existing auto-remediation incident issue.' - } - $existing = @($existingJson | ConvertFrom-Json) - if ($existing.Count -gt 0) { - $number = [string]$existing[0].number - & gh issue comment $number -R $env:REPOSITORY --body $body - if ($LASTEXITCODE -ne 0) { - throw "Failed to append auto-remediation issue comment to #$number." - } - } else { - & gh issue create -R $env:REPOSITORY --title $title --body $body - if ($LASTEXITCODE -ne 0) { - throw 'Failed to create auto-remediation incident issue.' - } - } + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close auto-remediation incident issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops Auto-Remediation Alert' + $body = @" + Ops auto-remediation recovered. + + - Run: $env:RUN_URL + - Status: pass + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/.github/workflows/ops-monitoring.yml b/.github/workflows/ops-monitoring.yml index 86499f2..4d5c5c3 100644 --- a/.github/workflows/ops-monitoring.yml +++ b/.github/workflows/ops-monitoring.yml @@ -55,7 +55,7 @@ jobs: path: ${{ runner.temp }}/ops-monitoring-report.json if-no-files-found: error - - name: Open or update ops monitoring tracking issue on failure + - name: Update ops monitoring tracking issue on failure if: failure() shell: pwsh env: @@ -85,21 +85,33 @@ jobs: - Sync guard repository: $($report.sync_guard.repository) "@ - $existingJson = & gh issue list -R $env:REPOSITORY --state open --search "$title in:title" --json number --limit 1 - if ($LASTEXITCODE -ne 0) { - throw 'Failed to query existing ops monitoring tracking issue.' - } + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body - $existing = @($existingJson | ConvertFrom-Json) - if ($existing.Count -gt 0) { - $number = [string]$existing[0].number - & gh issue comment $number -R $env:REPOSITORY --body $body - if ($LASTEXITCODE -ne 0) { - throw "Failed to append ops monitoring issue comment to #$number." - } - } else { - & gh issue create -R $env:REPOSITORY --title $title --body $body - if ($LASTEXITCODE -ne 0) { - throw 'Failed to create ops monitoring tracking issue.' - } - } + - name: Close ops monitoring tracking issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops Monitoring Alert' + $body = @" + Ops monitoring recovered. + + - Run: $env:RUN_URL + - Status: pass + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/.github/workflows/ops-policy-drift-check.yml b/.github/workflows/ops-policy-drift-check.yml new file mode 100644 index 0000000..0697a85 --- /dev/null +++ b/.github/workflows/ops-policy-drift-check.yml @@ -0,0 +1,104 @@ +name: ops-policy-drift-check + +on: + schedule: + - cron: '50 * * * *' + workflow_dispatch: + +permissions: + contents: read + issues: write + +jobs: + ops-policy-drift-check: + name: Ops Policy Drift Check + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Evaluate control-plane policy drift + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-policy-drift-report.json' + + & pwsh -NoProfile -File ./scripts/Test-ReleaseControlPlanePolicyDrift.ps1 ` + -ManifestPath ./workspace-governance.json ` + -PayloadManifestPath ./workspace-governance-payload/workspace-governance/workspace-governance.json ` + -OutputPath $reportPath + + - name: Upload policy drift report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ops-policy-drift-report-${{ github.run_id }} + path: ${{ runner.temp }}/ops-policy-drift-report.json + if-no-files-found: error + + - name: Update ops policy drift issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops Policy Drift Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-policy-drift-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "ops policy drift report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Ops policy drift check failed. + + - Run: $env:RUN_URL + - Reason codes: $reasonCodeText + - Message: $($report.message) + - Manifest: $($report.manifest_path) + - Payload manifest: $($report.payload_manifest_path) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close ops policy drift issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops Policy Drift Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-policy-drift-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "ops policy drift report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Ops policy drift check recovered. + + - Run: $env:RUN_URL + - Message: $($report.message) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/.github/workflows/ops-slo-gate.yml b/.github/workflows/ops-slo-gate.yml new file mode 100644 index 0000000..bd77c40 --- /dev/null +++ b/.github/workflows/ops-slo-gate.yml @@ -0,0 +1,154 @@ +name: ops-slo-gate + +on: + schedule: + - cron: '30 8 * * *' + workflow_dispatch: + inputs: + lookback_days: + description: SLO gate lookback window in days. + required: false + default: '7' + type: string + min_success_rate_pct: + description: Minimum success rate threshold (0-100). + required: false + default: '100' + type: string + sync_guard_max_age_hours: + description: Maximum sync-guard success age in hours. + required: false + default: '12' + type: string + +permissions: + contents: read + actions: read + issues: write + +jobs: + ops-slo-gate: + name: Ops SLO Gate + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Evaluate ops SLO gate + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-slo-gate-report.json' + + $lookbackDaysText = [string]'${{ inputs.lookback_days }}' + $lookbackDays = 7 + if (-not [string]::IsNullOrWhiteSpace($lookbackDaysText)) { + $parsedLookback = 0 + if (-not [int]::TryParse($lookbackDaysText, [ref]$parsedLookback)) { + throw "lookback_days must be an integer. actual='$lookbackDaysText'" + } + $lookbackDays = $parsedLookback + } + + $minSuccessRatePctText = [string]'${{ inputs.min_success_rate_pct }}' + $minSuccessRatePct = 100.0 + if (-not [string]::IsNullOrWhiteSpace($minSuccessRatePctText)) { + $parsedSuccessRate = 0.0 + if (-not [double]::TryParse($minSuccessRatePctText, [ref]$parsedSuccessRate)) { + throw "min_success_rate_pct must be a number. actual='$minSuccessRatePctText'" + } + $minSuccessRatePct = $parsedSuccessRate + } + + $syncGuardMaxAgeHoursText = [string]'${{ inputs.sync_guard_max_age_hours }}' + $syncGuardMaxAgeHours = 12 + if (-not [string]::IsNullOrWhiteSpace($syncGuardMaxAgeHoursText)) { + $parsedMaxAge = 0 + if (-not [int]::TryParse($syncGuardMaxAgeHoursText, [ref]$parsedMaxAge)) { + throw "sync_guard_max_age_hours must be an integer. actual='$syncGuardMaxAgeHoursText'" + } + $syncGuardMaxAgeHours = $parsedMaxAge + } + + & pwsh -NoProfile -File ./scripts/Test-OpsSloGate.ps1 ` + -SurfaceRepository '${{ github.repository }}' ` + -LookbackDays $lookbackDays ` + -MinSuccessRatePct $minSuccessRatePct ` + -SyncGuardMaxAgeHours $syncGuardMaxAgeHours ` + -OutputPath $reportPath + + - name: Upload ops SLO gate report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ops-slo-gate-report-${{ github.run_id }} + path: ${{ runner.temp }}/ops-slo-gate-report.json + if-no-files-found: error + + - name: Update ops SLO gate issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops SLO Gate Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-slo-gate-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "ops SLO gate report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Ops SLO gate failed. + + - Run: $env:RUN_URL + - Reason codes: $reasonCodeText + - Message: $($report.message) + - Lookback days: $($report.lookback_days) + - Min success rate pct: $($report.min_success_rate_pct) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close ops SLO gate issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Ops SLO Gate Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'ops-slo-gate-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "ops SLO gate report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Ops SLO gate recovered. + + - Run: $env:RUN_URL + - Message: $($report.message) + - Lookback days: $($report.lookback_days) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 3fab778..30b59fb 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -122,7 +122,7 @@ jobs: path: ${{ runner.temp }}/release-control-plane-report.json if-no-files-found: error - - name: Open or update release control plane incident issue on failure + - name: Update release control plane incident issue on failure if: failure() shell: pwsh env: @@ -148,21 +148,42 @@ jobs: - Repository: $($report.repository) "@ - $existingJson = & gh issue list -R $env:REPOSITORY --state open --search "$title in:title" --json number --limit 1 - if ($LASTEXITCODE -ne 0) { - throw 'Failed to query existing release control plane incident issue.' - } + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body - $existing = @($existingJson | ConvertFrom-Json) - if ($existing.Count -gt 0) { - $number = [string]$existing[0].number - & gh issue comment $number -R $env:REPOSITORY --body $body - if ($LASTEXITCODE -ne 0) { - throw "Failed to append release control plane issue comment to #$number." - } - } else { - & gh issue create -R $env:REPOSITORY --title $title --body $body - if ($LASTEXITCODE -ne 0) { - throw 'Failed to create release control plane incident issue.' - } + - name: Close release control plane incident issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Control Plane Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "Release control plane report missing: $reportPath" } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Release control plane recovered. + + - Run: $env:RUN_URL + - Mode: $($report.mode) + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Repository: $($report.repository) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/.github/workflows/release-rollback-drill.yml b/.github/workflows/release-rollback-drill.yml new file mode 100644 index 0000000..d667801 --- /dev/null +++ b/.github/workflows/release-rollback-drill.yml @@ -0,0 +1,136 @@ +name: release-rollback-drill + +on: + schedule: + - cron: '55 8 * * *' + workflow_dispatch: + inputs: + channel: + description: Release channel to evaluate rollback readiness for. + required: false + default: canary + type: choice + options: + - canary + - prerelease + - stable + required_history_count: + description: Number of channel releases required to validate previous rollback target. + required: false + default: '2' + type: string + +permissions: + contents: read + issues: write + +jobs: + release-rollback-drill: + name: Release Rollback Drill + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Execute rollback drill + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-rollback-drill-report.json' + + $channel = [string]'${{ inputs.channel }}' + if ([string]::IsNullOrWhiteSpace($channel)) { + $channel = 'canary' + } + + $requiredHistoryText = [string]'${{ inputs.required_history_count }}' + $requiredHistoryCount = 2 + if (-not [string]::IsNullOrWhiteSpace($requiredHistoryText)) { + $parsedRequiredHistory = 0 + if (-not [int]::TryParse($requiredHistoryText, [ref]$parsedRequiredHistory)) { + throw "required_history_count must be an integer. actual='$requiredHistoryText'" + } + $requiredHistoryCount = $parsedRequiredHistory + } + + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseRollbackDrill.ps1 ` + -Repository '${{ github.repository }}' ` + -Channel $channel ` + -RequiredHistoryCount $requiredHistoryCount ` + -OutputPath $reportPath + + - name: Upload rollback drill report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-rollback-drill-report-${{ github.run_id }} + path: ${{ runner.temp }}/release-rollback-drill-report.json + if-no-files-found: error + + - name: Update rollback drill issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Rollback Drill Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-rollback-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release rollback drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Release rollback drill failed. + + - Run: $env:RUN_URL + - Channel: $($report.channel) + - Reason codes: $reasonCodeText + - Message: $($report.message) + - Candidate count: $($report.candidate_count) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close rollback drill issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Rollback Drill Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-rollback-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release rollback drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Release rollback drill recovered. + + - Run: $env:RUN_URL + - Channel: $($report.channel) + - Message: $($report.message) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/AGENTS.md b/AGENTS.md index 5b6cc2f..a4b2851 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -181,6 +181,9 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref ## Ops Monitoring Policy - `.github/workflows/ops-monitoring.yml` is the authoritative hourly ops snapshot workflow. - It must run `scripts/Invoke-OpsMonitoringSnapshot.ps1` and fail with deterministic reason codes when runner or sync-guard health drifts. +- Incident lifecycle automation for ops workflows must run through `scripts/Invoke-OpsIncidentLifecycle.ps1`. +- Failure-path issue behavior: create if missing, reopen if closed, or comment if already open. +- Recovery-path issue behavior: comment and close when the latest matching issue is open. - Ops snapshot reason codes must remain explicit: - `runner_unavailable` - `sync_guard_failed` @@ -215,6 +218,28 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - stable: `80-99` - Promotion must gate on source release integrity (required assets + source commit equals branch head). - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. +- `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Test-OpsSloGate.ps1`. +- SLO gate reason codes must remain explicit: + - `workflow_missing_runs` + - `workflow_failure_detected` + - `workflow_success_rate_below_threshold` + - `sync_guard_missing` + - `sync_guard_stale` + - `slo_gate_runtime_error` +- `.github/workflows/ops-policy-drift-check.yml` must run `scripts/Test-ReleaseControlPlanePolicyDrift.ps1`. +- Policy drift reason codes must remain explicit: + - `manifest_missing` + - `payload_manifest_missing` + - `release_client_missing` + - `release_client_drift` + - `runtime_images_missing` + - `ops_control_plane_policy_missing` + - `policy_drift_runtime_error` +- `.github/workflows/release-rollback-drill.yml` must run `scripts/Invoke-ReleaseRollbackDrill.ps1`. +- Rollback drill reason codes must remain explicit: + - `rollback_candidate_missing` + - `rollback_assets_missing` + - `rollback_drill_runtime_error` - Operational incident handling runbook is `docs/runbooks/release-ops-incident-response.md`. ## Integration Gate Policy diff --git a/README.md b/README.md index 333658d..b18d0a0 100644 --- a/README.md +++ b/README.md @@ -360,7 +360,11 @@ Control-plane runner health is intentionally decoupled from Docker Desktop parit - `scripts/Invoke-ReleaseControlPlane.ps1` and `scripts/Invoke-OpsAutoRemediation.ps1` call ops monitoring with release-runner labels only (`self-hosted`, `windows`, `self-hosted-windows-lv`). - `ops-monitoring.yml` keeps strict defaults for Docker Desktop Windows gate visibility (`self-hosted`, `windows`, `self-hosted-windows-lv`, `windows-containers`, `user-session`, `cdev-surface-windows-gate`). -Every run uploads `ops-monitoring-report.json`. On failure, automation updates a single tracking issue (`Ops Monitoring Alert`). +Incident lifecycle is deterministic and shared by ops workflows via `scripts/Invoke-OpsIncidentLifecycle.ps1`: +- failure: create/reopen/comment the workflow-specific incident issue +- recovery: comment and close the open incident issue + +Every run uploads `ops-monitoring-report.json`. `canary-smoke-tag-hygiene.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-CanarySmokeTagHygiene.ps1` to keep latest `v0.YYYYMMDD.N` canary smoke tag(s) for a UTC date and delete older tags deterministically. @@ -384,6 +388,33 @@ Control-plane behavior: `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. +`ops-slo-gate.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Test-OpsSloGate.ps1` to enforce: +- 7-day lookback by default +- 100% success-rate target for `ops-monitoring`, `ops-autoremediate`, and `release-control-plane` +- max sync-guard success age of 12 hours +- deterministic reason codes on failure: + - `workflow_missing_runs` + - `workflow_failure_detected` + - `workflow_success_rate_below_threshold` + - `sync_guard_missing` + - `sync_guard_stale` + +`ops-policy-drift-check.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Test-ReleaseControlPlanePolicyDrift.ps1` and fails on: +- root/payload release-client policy drift +- missing runtime image metadata +- missing control-plane policy metadata +- deterministic reason codes on failure: + - `release_client_drift` + - `runtime_images_missing` + - `ops_control_plane_policy_missing` + +`release-rollback-drill.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-ReleaseRollbackDrill.ps1` to validate deterministic rollback readiness: +- channel-scoped latest/previous release candidates +- required release assets for rollback safety (`installer`, `.sha256`, `reproducibility-report.json`, SPDX/SLSA, `release-manifest.json`) +- deterministic reason codes on failure: + - `rollback_candidate_missing` + - `rollback_assets_missing` + ## Local Docker package for control-plane exercise Run the local Docker harness (safe default, validate + dry-run): diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 8e00417..57bb0f5 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -5,6 +5,9 @@ Deterministic operator response for Scope A hardening controls: - runner availability monitoring - cdev-cli fork/upstream sync-guard monitoring - canary smoke tag hygiene +- SLO gate enforcement +- policy drift detection +- rollback drill readiness ## Inputs - Surface repository: `LabVIEW-Community-CI-CD/labview-cdev-surface-fork` @@ -16,6 +19,7 @@ Deterministic operator response for Scope A hardening controls: 2. Read `reason_codes`. 3. Execute remediation by reason code. 4. If remediation is automatable, dispatch `ops-autoremediate.yml` first and re-check health. +5. Incident issue lifecycle is automated (`create/reopen/comment` on failure, `comment/close` on recovery) by `scripts/Invoke-OpsIncidentLifecycle.ps1`. Reason code mapping: - `runner_unavailable`: no online self-hosted runner matched required labels. @@ -119,6 +123,38 @@ gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cde -f dry_run=true ``` +## SLO Gate Dispatch +Run strict SLO gate with default 7-day window: + +```powershell +gh workflow run ops-slo-gate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +Run with explicit thresholds: + +```powershell +gh workflow run ops-slo-gate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f lookback_days=7 ` + -f min_success_rate_pct=100 ` + -f sync_guard_max_age_hours=12 +``` + +## Policy Drift Check Dispatch +Run control-plane policy drift check: + +```powershell +gh workflow run ops-policy-drift-check.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +## Rollback Drill Dispatch +Run deterministic rollback drill on canary lane: + +```powershell +gh workflow run release-rollback-drill.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f channel=canary ` + -f required_history_count=2 +``` + ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` diff --git a/scripts/Invoke-OpsIncidentLifecycle.ps1 b/scripts/Invoke-OpsIncidentLifecycle.ps1 new file mode 100644 index 0000000..686e452 --- /dev/null +++ b/scripts/Invoke-OpsIncidentLifecycle.ps1 @@ -0,0 +1,191 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$IssueTitle, + + [Parameter()] + [ValidateSet('Fail', 'Recover')] + [string]$Mode = 'Fail', + + [Parameter()] + [string]$Body = '', + + [Parameter()] + [string]$RunUrl = '', + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Parse-IssueTimestampUtc { + param([Parameter(Mandatory = $true)][object]$Issue) + + $value = [DateTimeOffset]::MinValue + if ([DateTimeOffset]::TryParse([string]$Issue.updatedAt, [ref]$value)) { + return $value.ToUniversalTime() + } + return [DateTimeOffset]::MinValue +} + +function Resolve-Body { + param( + [Parameter(Mandatory = $true)][string]$LifecycleMode, + [Parameter(Mandatory = $true)][string]$Text, + [Parameter(Mandatory = $true)][string]$Url + ) + + if (-not [string]::IsNullOrWhiteSpace($Text)) { + return $Text + } + + if ($LifecycleMode -eq 'Fail') { + return @" +Ops incident detected. + +- Run: $Url +"@ + } + + return @" +Ops incident recovered. + +- Run: $Url +"@ +} + +$report = [ordered]@{ + schema_version = '1.0' + timestamp_utc = Get-UtcNowIso + repository = $Repository + issue_title = $IssueTitle + mode = $Mode + run_url = $RunUrl + status = 'fail' + action = '' + issue = $null + message = '' +} + +try { + $resolvedBody = Resolve-Body -LifecycleMode $Mode -Text $Body -Url $RunUrl + $issues = @(Invoke-GhJson -Arguments @( + 'issue', 'list', + '-R', $Repository, + '--state', 'all', + '--search', "$IssueTitle in:title", + '--json', 'number,title,state,updatedAt,url', + '--limit', '50' + )) + $matches = @($issues | Where-Object { [string]$_.title -eq $IssueTitle }) + $target = @($matches | Sort-Object { Parse-IssueTimestampUtc -Issue $_ } -Descending | Select-Object -First 1) + + if ($Mode -eq 'Fail') { + if (@($target).Count -eq 0) { + $createOutput = (Invoke-GhText -Arguments @( + 'issue', 'create', + '-R', $Repository, + '--title', $IssueTitle, + '--body', $resolvedBody + )).Trim() + $report.action = 'created' + $report.issue = [ordered]@{ + number = [string]::Empty + state_before = 'missing' + state_after = 'open' + url = $createOutput + } + $report.message = 'Issue created for incident.' + } else { + $issueNumber = [string]$target[0].number + $issueState = [string]$target[0].state + $issueUrl = [string]$target[0].url + + if ($issueState -eq 'CLOSED') { + Invoke-Gh -Arguments @('issue', 'reopen', $issueNumber, '-R', $Repository) + Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $resolvedBody) + $report.action = 'reopened_and_commented' + $report.issue = [ordered]@{ + number = $issueNumber + state_before = 'closed' + state_after = 'open' + url = $issueUrl + } + $report.message = "Closed incident issue reopened and updated (#$issueNumber)." + } else { + Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $resolvedBody) + $report.action = 'commented' + $report.issue = [ordered]@{ + number = $issueNumber + state_before = 'open' + state_after = 'open' + url = $issueUrl + } + $report.message = "Open incident issue updated (#$issueNumber)." + } + } + } else { + if (@($target).Count -eq 0) { + $report.action = 'no_issue_found' + $report.issue = [ordered]@{ + number = [string]::Empty + state_before = 'missing' + state_after = 'missing' + url = [string]::Empty + } + $report.message = 'No historical incident issue found to close.' + } else { + $issueNumber = [string]$target[0].number + $issueState = [string]$target[0].state + $issueUrl = [string]$target[0].url + + if ($issueState -eq 'OPEN') { + Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $resolvedBody) + Invoke-Gh -Arguments @('issue', 'close', $issueNumber, '-R', $Repository) + $report.action = 'commented_and_closed' + $report.issue = [ordered]@{ + number = $issueNumber + state_before = 'open' + state_after = 'closed' + url = $issueUrl + } + $report.message = "Incident issue closed after recovery (#$issueNumber)." + } else { + $report.action = 'already_closed' + $report.issue = [ordered]@{ + number = $issueNumber + state_before = 'closed' + state_after = 'closed' + url = $issueUrl + } + $report.message = "Latest incident issue already closed (#$issueNumber)." + } + } + } + + $report.status = 'pass' +} +catch { + $report.status = 'fail' + $report.action = 'runtime_error' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Invoke-ReleaseRollbackDrill.ps1 b/scripts/Invoke-ReleaseRollbackDrill.ps1 new file mode 100644 index 0000000..a2a9590 --- /dev/null +++ b/scripts/Invoke-ReleaseRollbackDrill.ps1 @@ -0,0 +1,183 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidateSet('stable', 'prerelease', 'canary')] + [string]$Channel = 'canary', + + [Parameter()] + [ValidateRange(2, 100)] + [int]$RequiredHistoryCount = 2, + + [Parameter()] + [ValidateRange(10, 200)] + [int]$ReleaseLimit = 100, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +function Parse-ReleaseTagRecord { + param([Parameter(Mandatory = $true)][string]$TagName) + + $match = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') + if (-not $match.Success) { + return $null + } + + $sequence = 0 + if (-not [int]::TryParse([string]$match.Groups['sequence'].Value, [ref]$sequence)) { + return $null + } + + return [ordered]@{ + tag_name = $TagName + date = [string]$match.Groups['date'].Value + sequence = $sequence + } +} + +function Test-ChannelMatch { + param( + [Parameter(Mandatory = $true)][object]$ReleaseRecord, + [Parameter(Mandatory = $true)][string]$TargetChannel + ) + + $parsed = Parse-ReleaseTagRecord -TagName ([string]$ReleaseRecord.tagName) + if ($null -eq $parsed) { + return $false + } + + $seq = [int]$parsed.sequence + $isPrerelease = [bool]$ReleaseRecord.isPrerelease + switch ($TargetChannel) { + 'canary' { return $isPrerelease -and $seq -ge 1 -and $seq -le 49 } + 'prerelease' { return $isPrerelease -and $seq -ge 50 -and $seq -le 79 } + 'stable' { return (-not $isPrerelease) -and $seq -ge 80 -and $seq -le 99 } + default { return $false } + } +} + +$requiredAssets = @( + 'lvie-cdev-workspace-installer.exe', + 'lvie-cdev-workspace-installer.exe.sha256', + 'reproducibility-report.json', + 'workspace-installer.spdx.json', + 'workspace-installer.slsa.json', + 'release-manifest.json' +) + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + channel = $Channel + required_history_count = $RequiredHistoryCount + status = 'fail' + reason_codes = @() + message = '' + candidate_count = 0 + current = $null + previous = $null + required_assets = $requiredAssets + asset_checks = @() +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() +$assetChecks = [System.Collections.Generic.List[object]]::new() + +try { + $releases = @(Get-GhReleasesPortable -Repository $Repository -Limit $ReleaseLimit -ExcludeDrafts) + $candidates = @( + $releases | + Where-Object { Test-ChannelMatch -ReleaseRecord $_ -TargetChannel $Channel } | + Sort-Object { + $parsed = Parse-ReleaseTagRecord -TagName ([string]$_.tagName) + "{0}-{1:D3}" -f [string]$parsed.date, [int]$parsed.sequence + } -Descending + ) + + $report.candidate_count = @($candidates).Count + if (@($candidates).Count -lt $RequiredHistoryCount) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'rollback_candidate_missing' + } else { + $current = $candidates[0] + $previous = $candidates[1] + $report.current = [ordered]@{ + tag = [string]$current.tagName + published_at_utc = [string]$current.publishedAt + url = [string]$current.url + } + $report.previous = [ordered]@{ + tag = [string]$previous.tagName + published_at_utc = [string]$previous.publishedAt + url = [string]$previous.url + } + + foreach ($tag in @([string]$current.tagName, [string]$previous.tagName)) { + $release = Invoke-GhJson -Arguments @( + 'release', 'view', + $tag, + '-R', $Repository, + '--json', 'tagName,assets,targetCommitish,isPrerelease,publishedAt,url' + ) + $assetNames = @($release.assets | ForEach-Object { [string]$_.name }) + foreach ($asset in @($requiredAssets)) { + $present = $assetNames -contains $asset + $assetChecks.Add([ordered]@{ + tag = $tag + asset = $asset + present = $present + }) | Out-Null + if (-not $present) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'rollback_assets_missing' + } + } + } + } + + $report.asset_checks = @($assetChecks) + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Release rollback drill passed.' + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Release rollback drill failed. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } +} +catch { + $report.status = 'fail' + $report.reason_codes = @('rollback_drill_runtime_error') + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Test-OpsSloGate.ps1 b/scripts/Test-OpsSloGate.ps1 new file mode 100644 index 0000000..06fc4b2 --- /dev/null +++ b/scripts/Test-OpsSloGate.ps1 @@ -0,0 +1,190 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SurfaceRepository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SyncGuardRepository = 'LabVIEW-Community-CI-CD/labview-cdev-cli', + + [Parameter()] + [ValidateRange(1, 90)] + [int]$LookbackDays = 7, + + [Parameter()] + [ValidateRange(0, 100)] + [double]$MinSuccessRatePct = 100, + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string[]]$RequiredWorkflows = @( + 'ops-monitoring', + 'ops-autoremediate', + 'release-control-plane' + ), + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + surface_repository = $SurfaceRepository + sync_guard_repository = $SyncGuardRepository + lookback_days = $LookbackDays + min_success_rate_pct = $MinSuccessRatePct + sync_guard_max_age_hours = $SyncGuardMaxAgeHours + required_workflows = @($RequiredWorkflows) + status = 'fail' + reason_codes = @() + message = '' + workflow_evaluations = @() + sync_guard_evaluation = [ordered]@{} + source_slo_report = $null +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() + +try { + $sloScript = Join-Path $PSScriptRoot 'Write-OpsSloReport.ps1' + if (-not (Test-Path -LiteralPath $sloScript -PathType Leaf)) { + throw "required_script_missing: $sloScript" + } + + $scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("ops-slo-gate-" + [Guid]::NewGuid().ToString('N')) + New-Item -Path $scratchRoot -ItemType Directory -Force | Out-Null + + try { + $sloPath = Join-Path $scratchRoot 'weekly-ops-slo-report.json' + & pwsh -NoProfile -File $sloScript ` + -SurfaceRepository $SurfaceRepository ` + -SyncGuardRepository $SyncGuardRepository ` + -LookbackDays $LookbackDays ` + -OutputPath $sloPath + if ($LASTEXITCODE -ne 0) { + throw "slo_report_generation_failed: exit_code=$LASTEXITCODE" + } + + $sloReport = Get-Content -LiteralPath $sloPath -Raw | ConvertFrom-Json -ErrorAction Stop + $report.source_slo_report = $sloReport + + $workflowEvaluations = [System.Collections.Generic.List[object]]::new() + foreach ($workflowName in @($RequiredWorkflows)) { + $candidate = @($sloReport.workflows | Where-Object { [string]$_.workflow -eq [string]$workflowName } | Select-Object -First 1) + if (@($candidate).Count -ne 1) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'workflow_missing_runs' + [void]$workflowEvaluations.Add([ordered]@{ + workflow = $workflowName + status = 'fail' + reason = 'missing' + detail = 'No SLO record found for required workflow.' + }) + continue + } + + $record = $candidate[0] + $workflowStatus = 'pass' + $workflowReasons = [System.Collections.Generic.List[string]]::new() + if ([int]$record.total_runs -lt 1) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'workflow_missing_runs' + [void]$workflowReasons.Add('missing_runs') + } + if ([int]$record.failure_runs -gt 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'workflow_failure_detected' + [void]$workflowReasons.Add('failure_runs_present') + } + if ([double]$record.success_rate_pct -lt $MinSuccessRatePct) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'workflow_success_rate_below_threshold' + [void]$workflowReasons.Add('success_rate_below_threshold') + } + if (@($workflowReasons).Count -gt 0) { + $workflowStatus = 'fail' + } + + [void]$workflowEvaluations.Add([ordered]@{ + workflow = [string]$record.workflow + status = $workflowStatus + reason_codes = @($workflowReasons) + total_runs = [int]$record.total_runs + completed_runs = [int]$record.completed_runs + success_runs = [int]$record.success_runs + failure_runs = [int]$record.failure_runs + success_rate_pct = [double]$record.success_rate_pct + }) + } + $report.workflow_evaluations = @($workflowEvaluations) + + $syncGuardEvaluation = [ordered]@{ + status = 'pass' + reason_codes = @() + latest_success_age_hours = $sloReport.sync_guard.latest_success_age_hours + total_runs = $sloReport.sync_guard.total_runs + } + $syncGuardReasons = [System.Collections.Generic.List[string]]::new() + if ($null -eq $sloReport.sync_guard.latest_success_run) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_missing' + [void]$syncGuardReasons.Add('latest_success_missing') + } elseif ($null -ne $sloReport.sync_guard.latest_success_age_hours -and [double]$sloReport.sync_guard.latest_success_age_hours -gt $SyncGuardMaxAgeHours) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'sync_guard_stale' + [void]$syncGuardReasons.Add('latest_success_stale') + } + + if (@($syncGuardReasons).Count -gt 0) { + $syncGuardEvaluation.status = 'fail' + $syncGuardEvaluation.reason_codes = @($syncGuardReasons) + } + $report.sync_guard_evaluation = $syncGuardEvaluation + + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Ops SLO gate passed.' + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Ops SLO gate failed. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } + } + finally { + if (Test-Path -LiteralPath $scratchRoot -PathType Container) { + Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue + } + } +} +catch { + $report.status = 'fail' + $report.reason_codes = @('slo_gate_runtime_error') + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 61be9fd..38bbd08 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -168,6 +168,18 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops') -Detail ([string]$releaseClient.runtime_images.ops_runtime.repository) Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_exists' -Passed ($null -ne $releaseClient.ops_control_plane_policy) -Detail 'installer_contract.release_client.ops_control_plane_policy' + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_channel' -Passed ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel -eq 'canary') -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_history_count' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count -eq 2) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_release_limit' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit) } $requiredSchemaFields = @( diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 3d2f9b3..efcc3b9 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -91,6 +91,19 @@ if ($null -ne $releaseClient) { Add-Check -Name 'runtime_images_ops_runtime_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops') -Detail ([string]$releaseClient.runtime_images.ops_runtime.repository) Add-Check -Name 'runtime_images_ops_runtime_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) Add-Check -Name 'runtime_images_ops_runtime_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) + Add-Check -Name 'ops_control_plane_policy_exists' -Passed ($null -ne $releaseClient.ops_control_plane_policy) -Detail 'installer_contract.release_client.ops_control_plane_policy' + Add-Check -Name 'ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) + Add-Check -Name 'ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) + Add-Check -Name 'ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) + Add-Check -Name 'ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Name 'ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Name 'ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Name 'ops_policy_incident_auto_close_on_recovery' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) + Add-Check -Name 'ops_policy_incident_reopen_on_regression' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Name 'ops_policy_incident_title_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Control Plane Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) + Add-Check -Name 'ops_policy_rollback_channel' -Passed ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel -eq 'canary') -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel) + Add-Check -Name 'ops_policy_rollback_required_history_count' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count -eq 2) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count) + Add-Check -Name 'ops_policy_rollback_release_limit' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit) if ([DateTime]::Parse([string]$releaseClient.signature_policy.dual_mode_start_utc) -gt [DateTime]::Parse([string]$releaseClient.signature_policy.canary_enforce_utc)) { Add-Check -Name 'signature_date_order_dual_before_canary' -Passed $false -Detail 'dual_mode_start_utc must be <= canary_enforce_utc' diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 new file mode 100644 index 0000000..82789da --- /dev/null +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -0,0 +1,122 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [string]$ManifestPath = 'workspace-governance.json', + + [Parameter()] + [string]$PayloadManifestPath = 'workspace-governance-payload/workspace-governance/workspace-governance.json', + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + manifest_path = $ManifestPath + payload_manifest_path = $PayloadManifestPath + status = 'fail' + reason_codes = @() + message = '' + checks = @() +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() +$checks = [System.Collections.Generic.List[object]]::new() + +try { + $resolvedManifestPath = [System.IO.Path]::GetFullPath($ManifestPath) + $resolvedPayloadManifestPath = [System.IO.Path]::GetFullPath($PayloadManifestPath) + $report.manifest_path = $resolvedManifestPath + $report.payload_manifest_path = $resolvedPayloadManifestPath + + if (-not (Test-Path -LiteralPath $resolvedManifestPath -PathType Leaf)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'manifest_missing' + } + if (-not (Test-Path -LiteralPath $resolvedPayloadManifestPath -PathType Leaf)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'payload_manifest_missing' + } + + if ($reasonCodes.Count -eq 0) { + $manifest = Get-Content -LiteralPath $resolvedManifestPath -Raw | ConvertFrom-Json -Depth 100 + $payloadManifest = Get-Content -LiteralPath $resolvedPayloadManifestPath -Raw | ConvertFrom-Json -Depth 100 + + $releaseClient = $manifest.installer_contract.release_client + $payloadReleaseClient = $payloadManifest.installer_contract.release_client + + if ($null -eq $releaseClient -or $null -eq $payloadReleaseClient) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'release_client_missing' + } else { + $releaseClientJson = $releaseClient | ConvertTo-Json -Depth 100 + $payloadReleaseClientJson = $payloadReleaseClient | ConvertTo-Json -Depth 100 + $matches = [string]::Equals($releaseClientJson, $payloadReleaseClientJson, [System.StringComparison]::Ordinal) + $checks.Add([ordered]@{ + check = 'release_client_equivalent' + passed = $matches + }) | Out-Null + if (-not $matches) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'release_client_drift' + } + + $runtimeImagesPresent = ($null -ne $releaseClient.runtime_images) + $checks.Add([ordered]@{ + check = 'release_client_runtime_images_present' + passed = $runtimeImagesPresent + }) | Out-Null + if (-not $runtimeImagesPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'runtime_images_missing' + } + + $opsPolicyPresent = ($null -ne $releaseClient.ops_control_plane_policy) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_present' + passed = $opsPolicyPresent + }) | Out-Null + if (-not $opsPolicyPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_policy_missing' + } + } + } + + $report.checks = @($checks) + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Release control-plane policy drift check passed.' + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Release control-plane policy drift check failed. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } +} +catch { + $report.status = 'fail' + $report.reason_codes = @('policy_drift_runtime_error') + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 index eba099e..fb00ada 100644 --- a/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 +++ b/tests/OpsAutoRemediationWorkflowContract.Tests.ps1 @@ -29,9 +29,11 @@ Describe 'Ops auto-remediation workflow contract' { It 'executes deterministic remediation and reports incidents' { $script:workflowContent | Should -Match 'Invoke-OpsAutoRemediation\.ps1' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'ops-autoremediate-report\.json' $script:workflowContent | Should -Match 'Ops Auto-Remediation Alert' - $script:workflowContent | Should -Match 'gh issue' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' } It 'targets sync-guard drift and classifies manual runner intervention' { diff --git a/tests/OpsIncidentLifecycleContract.Tests.ps1 b/tests/OpsIncidentLifecycleContract.Tests.ps1 new file mode 100644 index 0000000..8061e1b --- /dev/null +++ b/tests/OpsIncidentLifecycleContract.Tests.ps1 @@ -0,0 +1,38 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Ops incident lifecycle contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:scriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsIncidentLifecycle.ps1' + + if (-not (Test-Path -LiteralPath $script:scriptPath -PathType Leaf)) { + throw "Ops incident lifecycle script missing: $script:scriptPath" + } + + $script:scriptContent = Get-Content -LiteralPath $script:scriptPath -Raw + } + + It 'defines deterministic fail and recover modes' { + $script:scriptContent | Should -Match "ValidateSet\('Fail', 'Recover'\)" + $script:scriptContent | Should -Match 'issue_title' + $script:scriptContent | Should -Match 'mode' + $script:scriptContent | Should -Match 'action' + } + + It 'handles create comment reopen close issue transitions' { + $script:scriptContent | Should -Match "'issue', 'list'" + $script:scriptContent | Should -Match "'issue', 'create'" + $script:scriptContent | Should -Match "'issue', 'comment'" + $script:scriptContent | Should -Match "'issue', 'reopen'" + $script:scriptContent | Should -Match "'issue', 'close'" + } + + It 'emits machine-readable report output' { + $script:scriptContent | Should -Match 'schema_version' + $script:scriptContent | Should -Match 'Write-WorkflowOpsReport' + $script:scriptContent | Should -Match 'runtime_error' + } +} diff --git a/tests/OpsMonitoringWorkflowContract.Tests.ps1 b/tests/OpsMonitoringWorkflowContract.Tests.ps1 index 33d26ec..a2259ef 100644 --- a/tests/OpsMonitoringWorkflowContract.Tests.ps1 +++ b/tests/OpsMonitoringWorkflowContract.Tests.ps1 @@ -28,9 +28,12 @@ Describe 'Ops monitoring workflow contract' { It 'runs snapshot script and uploads deterministic report artifact' { $script:workflowContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'ops-monitoring-report\.json' $script:workflowContent | Should -Match 'upload-artifact' $script:workflowContent | Should -Match 'Ops Monitoring Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' } It 'checks runner and sync-guard health with deterministic reason codes' { diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..04cea5f --- /dev/null +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -0,0 +1,42 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Ops policy drift workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/ops-policy-drift-check.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Test-ReleaseControlPlanePolicyDrift.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Ops policy drift contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + } + + It 'runs policy drift validation and incident lifecycle handling' { + $script:workflowContent | Should -Match 'Test-ReleaseControlPlanePolicyDrift\.ps1' + $script:workflowContent | Should -Match 'ops-policy-drift-report\.json' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match 'Ops Policy Drift Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + } + + It 'verifies release-client policy equivalence and required ops metadata' { + $script:runtimeContent | Should -Match 'release_client_equivalent' + $script:runtimeContent | Should -Match 'release_client_drift' + $script:runtimeContent | Should -Match 'runtime_images_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' + } +} diff --git a/tests/OpsSloGateWorkflowContract.Tests.ps1 b/tests/OpsSloGateWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..1a66cb6 --- /dev/null +++ b/tests/OpsSloGateWorkflowContract.Tests.ps1 @@ -0,0 +1,50 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Ops SLO gate workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Test-OpsSloGate.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Ops SLO gate contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable with deterministic SLO inputs' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'lookback_days' + $script:workflowContent | Should -Match 'min_success_rate_pct' + $script:workflowContent | Should -Match 'sync_guard_max_age_hours' + } + + It 'runs SLO gate runtime, uploads report, and manages incident lifecycle' { + $script:workflowContent | Should -Match 'Test-OpsSloGate\.ps1' + $script:workflowContent | Should -Match 'ops-slo-gate-report\.json' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match 'Ops SLO Gate Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + } + + It 'evaluates workflow and sync-guard SLO conditions with deterministic reason codes' { + $script:runtimeContent | Should -Match 'Write-OpsSloReport\.ps1' + $script:runtimeContent | Should -Match 'ops-monitoring' + $script:runtimeContent | Should -Match 'ops-autoremediate' + $script:runtimeContent | Should -Match 'release-control-plane' + $script:runtimeContent | Should -Match 'workflow_missing_runs' + $script:runtimeContent | Should -Match 'workflow_failure_detected' + $script:runtimeContent | Should -Match 'workflow_success_rate_below_threshold' + $script:runtimeContent | Should -Match 'sync_guard_stale' + $script:runtimeContent | Should -Match 'sync_guard_missing' + } +} diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index 38a6527..7822868 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -52,6 +52,20 @@ Describe 'Release client policy contract' { $releaseClient.runtime_images.ops_runtime.repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops' $releaseClient.runtime_images.ops_runtime.base_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' $releaseClient.runtime_images.ops_runtime.base_digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $releaseClient.ops_control_plane_policy.slo_gate.lookback_days | Should -Be 7 + $releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct | Should -Be 100 + $releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours | Should -Be 12 + @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-monitoring' + @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-autoremediate' + @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'release-control-plane' + $releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery | Should -BeTrue + $releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression | Should -BeTrue + @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops SLO Gate Alert' + @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' + @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' + $releaseClient.ops_control_plane_policy.rollback_drill.channel | Should -Be 'canary' + $releaseClient.ops_control_plane_policy.rollback_drill.required_history_count | Should -Be 2 + $releaseClient.ops_control_plane_policy.rollback_drill.release_limit | Should -Be 100 ($script:payloadManifest | ConvertTo-Json -Depth 100) | Should -Be ($script:manifest | ConvertTo-Json -Depth 100) } @@ -66,6 +80,9 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'runtime_images_exists' $script:policyScriptContent | Should -Match 'runtime_images_cdev_cli_runtime_canonical_repository' $script:policyScriptContent | Should -Match 'runtime_images_ops_runtime_base_digest' + $script:policyScriptContent | Should -Match 'ops_control_plane_policy_exists' + $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' + $script:policyScriptContent | Should -Match 'ops_policy_rollback_release_limit' } It 'has parse-safe PowerShell syntax' { diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 51f272d..74c2a42 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -36,8 +36,11 @@ Describe 'Release control plane workflow contract' { $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' $script:workflowContent | Should -Match 'hosted_runner_required' $script:workflowContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'release-control-plane-report\.json' $script:workflowContent | Should -Match 'Release Control Plane Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' $script:workflowContent | Should -Match 'actions:\s*write' $script:workflowContent | Should -Match 'contents:\s*write' } diff --git a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..4257e5a --- /dev/null +++ b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 @@ -0,0 +1,48 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release rollback drill workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Release rollback drill contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable with channel and history controls' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'channel:' + $script:workflowContent | Should -Match 'required_history_count' + } + + It 'runs rollback drill runtime, uploads report, and manages incident lifecycle' { + $script:workflowContent | Should -Match 'Invoke-ReleaseRollbackDrill\.ps1' + $script:workflowContent | Should -Match 'release-rollback-drill-report\.json' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match 'Release Rollback Drill Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + } + + It 'validates channel-specific release history and required rollback assets' { + $script:runtimeContent | Should -Match "ValidateSet\('stable', 'prerelease', 'canary'\)" + $script:runtimeContent | Should -Match 'rollback_candidate_missing' + $script:runtimeContent | Should -Match 'rollback_assets_missing' + $script:runtimeContent | Should -Match 'lvie-cdev-workspace-installer\.exe' + $script:runtimeContent | Should -Match 'release-manifest\.json' + $script:runtimeContent | Should -Match 'workspace-installer\.spdx\.json' + $script:runtimeContent | Should -Match 'workspace-installer\.slsa\.json' + $script:runtimeContent | Should -Match 'reproducibility-report\.json' + } +} diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index 5d4b76b..809505c 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -26,17 +26,27 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'fork-upstream-sync-guard' $script:runbookContent | Should -Match 'Invoke-ControlledForkForceAlign\.ps1' $script:runbookContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' + $script:runbookContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:runbookContent | Should -Match 'ops-slo-gate\.yml' + $script:runbookContent | Should -Match 'ops-policy-drift-check\.yml' + $script:runbookContent | Should -Match 'release-rollback-drill\.yml' $script:runbookContent | Should -Match '20260226' } It 'keeps README and AGENTS aligned to Scope A workflows' { $script:readmeContent | Should -Match 'ops-monitoring\.yml' $script:readmeContent | Should -Match 'canary-smoke-tag-hygiene\.yml' + $script:readmeContent | Should -Match 'ops-slo-gate\.yml' + $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' + $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-ops-incident-response\.md' $script:agentsContent | Should -Match 'Ops Monitoring Policy' $script:agentsContent | Should -Match 'runner_unavailable' $script:agentsContent | Should -Match 'sync_guard_failed' $script:agentsContent | Should -Match 'canary-smoke-tag-hygiene\.yml' + $script:agentsContent | Should -Match 'ops-slo-gate\.yml' + $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' + $script:agentsContent | Should -Match 'release-rollback-drill\.yml' } } diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 2e803cd..98a8f9b 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -30,6 +30,10 @@ Describe 'Workspace surface contract' { $script:testProvenanceScriptPath = Join-Path $script:repoRoot 'scripts/Test-ProvenanceContracts.ps1' $script:installFromReleaseScriptPath = Join-Path $script:repoRoot 'scripts/Install-WorkspaceInstallerFromRelease.ps1' $script:testReleaseClientContractsScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseClientContracts.ps1' + $script:opsIncidentLifecycleScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsIncidentLifecycle.ps1' + $script:opsSloGateScriptPath = Join-Path $script:repoRoot 'scripts/Test-OpsSloGate.ps1' + $script:opsPolicyDriftScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseControlPlanePolicyDrift.ps1' + $script:rollbackDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' $script:windowsContainerNsisSelfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' $script:windowsContainerNsisDockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' @@ -45,6 +49,9 @@ Describe 'Workspace surface contract' { $script:releaseCoreWorkflowPath = Join-Path $script:repoRoot '.github/workflows/_release-workspace-installer-core.yml' $script:releaseWithGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-with-windows-gate.yml' $script:canaryWorkflowPath = Join-Path $script:repoRoot '.github/workflows/nightly-supplychain-canary.yml' + $script:opsSloGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' + $script:opsPolicyDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-policy-drift-check.yml' + $script:rollbackDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' $script:linuxNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-linux-nsis-parity-image.yml' $script:windowsNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-windows-nsis-parity-image.yml' $script:windowsImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/windows-labview-image-gate.yml' @@ -88,6 +95,10 @@ Describe 'Workspace surface contract' { $script:testProvenanceScriptPath, $script:installFromReleaseScriptPath, $script:testReleaseClientContractsScriptPath, + $script:opsIncidentLifecycleScriptPath, + $script:opsSloGateScriptPath, + $script:opsPolicyDriftScriptPath, + $script:rollbackDrillScriptPath, $script:dockerLinuxIterationScriptPath, $script:windowsContainerNsisSelfTestScriptPath, $script:windowsContainerNsisDockerfilePath, @@ -103,6 +114,9 @@ Describe 'Workspace surface contract' { $script:releaseCoreWorkflowPath, $script:releaseWithGateWorkflowPath, $script:canaryWorkflowPath, + $script:opsSloGateWorkflowPath, + $script:opsPolicyDriftWorkflowPath, + $script:rollbackDrillWorkflowPath, $script:linuxNsisParityImagePublishWorkflowPath, $script:windowsNsisParityImagePublishWorkflowPath, $script:windowsImageGateWorkflowPath, @@ -253,6 +267,20 @@ Describe 'Workspace surface contract' { $script:manifest.installer_contract.release_client.runtime_images.ops_runtime.repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops' $script:manifest.installer_contract.release_client.runtime_images.ops_runtime.base_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' $script:manifest.installer_contract.release_client.runtime_images.ops_runtime.base_digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.lookback_days | Should -Be 7 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.min_success_rate_pct | Should -Be 100 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours | Should -Be 12 + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.reopen_on_regression | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops SLO Gate Alert') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.rollback_drill.channel | Should -Be 'canary' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.rollback_drill.required_history_count | Should -Be 2 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.rollback_drill.release_limit | Should -Be 100 foreach ($repo in @($script:manifest.managed_repos)) { $repo.PSObject.Properties.Name | Should -Contain 'required_gh_repo' $repo.PSObject.Properties.Name | Should -Contain 'default_branch' @@ -334,6 +362,14 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'svelderrainruiz/labview-cdev-cli' $script:agentsContent | Should -Match 'LabVIEW-Community-CI-CD/labview-cdev-cli' $script:agentsContent | Should -Match 'runtime_images' + $script:agentsContent | Should -Match 'ops_control_plane_policy' + $script:agentsContent | Should -Match 'ops-slo-gate\.yml' + $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' + $script:agentsContent | Should -Match 'release-rollback-drill\.yml' + $script:agentsContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:agentsContent | Should -Match 'workflow_failure_detected' + $script:agentsContent | Should -Match 'release_client_drift' + $script:agentsContent | Should -Match 'rollback_candidate_missing' $script:agentsContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime' $script:agentsContent | Should -Match '8fef6f9192d81a14add28636c1100c109ae5e977' $script:agentsContent | Should -Match '0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' @@ -361,6 +397,13 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'workspace-release-state\.json' $script:readmeContent | Should -Match 'workspace-release-client-latest\.json' $script:readmeContent | Should -Match 'runtime_images' + $script:readmeContent | Should -Match 'ops-slo-gate\.yml' + $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' + $script:readmeContent | Should -Match 'release-rollback-drill\.yml' + $script:readmeContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:readmeContent | Should -Match 'workflow_failure_detected' + $script:readmeContent | Should -Match 'release_client_drift' + $script:readmeContent | Should -Match 'rollback_candidate_missing' $script:readmeContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime' $script:readmeContent | Should -Match '8fef6f9192d81a14add28636c1100c109ae5e977' $script:readmeContent | Should -Match '0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' @@ -400,6 +443,10 @@ Describe 'Workspace surface contract' { $script:ciWorkflowContent | Should -Match 'CiWorkflowReliabilityContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'WorkspaceShaRefreshPrContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'WorkspaceManifestPinRefreshScript\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'OpsIncidentLifecycleContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'OpsSloGateWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'OpsPolicyDriftWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'ReleaseRollbackDrillWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxLabviewImageGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxContainerNsisParityContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxNsisParityImagePublishWorkflowContract\.Tests\.ps1' diff --git a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 index 61be9fd..38bbd08 100644 --- a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 +++ b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 @@ -168,6 +168,18 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops') -Detail ([string]$releaseClient.runtime_images.ops_runtime.repository) Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_exists' -Passed ($null -ne $releaseClient.ops_control_plane_policy) -Detail 'installer_contract.release_client.ops_control_plane_policy' + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_channel' -Passed ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel -eq 'canary') -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_history_count' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count -eq 2) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_release_limit' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit) } $requiredSchemaFields = @( diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 4723e75..0cb57e3 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -253,6 +253,35 @@ "base_digest": "sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423" } }, + "ops_control_plane_policy": { + "slo_gate": { + "lookback_days": 7, + "min_success_rate_pct": 100, + "max_sync_guard_age_hours": 12, + "required_workflows": [ + "ops-monitoring", + "ops-autoremediate", + "release-control-plane" + ] + }, + "incident_lifecycle": { + "auto_close_on_recovery": true, + "reopen_on_regression": true, + "titles": [ + "Ops Monitoring Alert", + "Ops Auto-Remediation Alert", + "Release Control Plane Alert", + "Ops SLO Gate Alert", + "Ops Policy Drift Alert", + "Release Rollback Drill Alert" + ] + }, + "rollback_drill": { + "channel": "canary", + "required_history_count": 2, + "release_limit": 100 + } + }, "cdev_cli_sync": { "primary_repo": "svelderrainruiz/labview-cdev-cli", "mirror_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", diff --git a/workspace-governance.json b/workspace-governance.json index 4723e75..0cb57e3 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -253,6 +253,35 @@ "base_digest": "sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423" } }, + "ops_control_plane_policy": { + "slo_gate": { + "lookback_days": 7, + "min_success_rate_pct": 100, + "max_sync_guard_age_hours": 12, + "required_workflows": [ + "ops-monitoring", + "ops-autoremediate", + "release-control-plane" + ] + }, + "incident_lifecycle": { + "auto_close_on_recovery": true, + "reopen_on_regression": true, + "titles": [ + "Ops Monitoring Alert", + "Ops Auto-Remediation Alert", + "Release Control Plane Alert", + "Ops SLO Gate Alert", + "Ops Policy Drift Alert", + "Release Rollback Drill Alert" + ] + }, + "rollback_drill": { + "channel": "canary", + "required_history_count": 2, + "release_limit": 100 + } + }, "cdev_cli_sync": { "primary_repo": "svelderrainruiz/labview-cdev-cli", "mirror_repo": "LabVIEW-Community-CI-CD/labview-cdev-cli", From e4b58c0ef1b3c17028e1a1d3d41d75c328e5c3c8 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 19:22:22 -0800 Subject: [PATCH 21/60] Fix Scope A reason-code binding for empty list paths --- scripts/Invoke-OpsMonitoringSnapshot.ps1 | 2 +- scripts/Invoke-ReleaseRollbackDrill.ps1 | 2 +- scripts/Test-OpsSloGate.ps1 | 2 +- scripts/Test-ReleaseControlPlanePolicyDrift.ps1 | 2 +- tests/OpsMonitoringWorkflowContract.Tests.ps1 | 1 + tests/OpsPolicyDriftWorkflowContract.Tests.ps1 | 1 + tests/OpsSloGateWorkflowContract.Tests.ps1 | 1 + tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 | 1 + 8 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/Invoke-OpsMonitoringSnapshot.ps1 b/scripts/Invoke-OpsMonitoringSnapshot.ps1 index e55a83d..cca1566 100644 --- a/scripts/Invoke-OpsMonitoringSnapshot.ps1 +++ b/scripts/Invoke-OpsMonitoringSnapshot.ps1 @@ -69,7 +69,7 @@ function Convert-RunRecord { function Add-ReasonCode { param( - [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, [Parameter(Mandatory = $true)][string]$ReasonCode ) diff --git a/scripts/Invoke-ReleaseRollbackDrill.ps1 b/scripts/Invoke-ReleaseRollbackDrill.ps1 index a2a9590..f7b8a76 100644 --- a/scripts/Invoke-ReleaseRollbackDrill.ps1 +++ b/scripts/Invoke-ReleaseRollbackDrill.ps1 @@ -28,7 +28,7 @@ $ErrorActionPreference = 'Stop' function Add-ReasonCode { param( - [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, [Parameter(Mandatory = $true)][string]$ReasonCode ) diff --git a/scripts/Test-OpsSloGate.ps1 b/scripts/Test-OpsSloGate.ps1 index 06fc4b2..7214273 100644 --- a/scripts/Test-OpsSloGate.ps1 +++ b/scripts/Test-OpsSloGate.ps1 @@ -40,7 +40,7 @@ $ErrorActionPreference = 'Stop' function Add-ReasonCode { param( - [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, [Parameter(Mandatory = $true)][string]$ReasonCode ) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index 82789da..30b79eb 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -18,7 +18,7 @@ $ErrorActionPreference = 'Stop' function Add-ReasonCode { param( - [Parameter(Mandatory = $true)][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, [Parameter(Mandatory = $true)][string]$ReasonCode ) diff --git a/tests/OpsMonitoringWorkflowContract.Tests.ps1 b/tests/OpsMonitoringWorkflowContract.Tests.ps1 index a2259ef..65eb0eb 100644 --- a/tests/OpsMonitoringWorkflowContract.Tests.ps1 +++ b/tests/OpsMonitoringWorkflowContract.Tests.ps1 @@ -39,6 +39,7 @@ Describe 'Ops monitoring workflow contract' { It 'checks runner and sync-guard health with deterministic reason codes' { $script:scriptContent | Should -Match 'repos/\$SurfaceRepository/actions/runners\?per_page=100' $script:scriptContent | Should -Match 'Get-GhWorkflowRunsPortable' + $script:scriptContent | Should -Match 'AllowEmptyCollection' $script:scriptContent | Should -Match 'runner_unavailable' $script:scriptContent | Should -Match 'runner_visibility_unavailable' $script:scriptContent | Should -Match 'sync_guard_failed' diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index 04cea5f..b21fff1 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -34,6 +34,7 @@ Describe 'Ops policy drift workflow contract' { } It 'verifies release-client policy equivalence and required ops metadata' { + $script:runtimeContent | Should -Match 'AllowEmptyCollection' $script:runtimeContent | Should -Match 'release_client_equivalent' $script:runtimeContent | Should -Match 'release_client_drift' $script:runtimeContent | Should -Match 'runtime_images_missing' diff --git a/tests/OpsSloGateWorkflowContract.Tests.ps1 b/tests/OpsSloGateWorkflowContract.Tests.ps1 index 1a66cb6..97324bd 100644 --- a/tests/OpsSloGateWorkflowContract.Tests.ps1 +++ b/tests/OpsSloGateWorkflowContract.Tests.ps1 @@ -38,6 +38,7 @@ Describe 'Ops SLO gate workflow contract' { It 'evaluates workflow and sync-guard SLO conditions with deterministic reason codes' { $script:runtimeContent | Should -Match 'Write-OpsSloReport\.ps1' + $script:runtimeContent | Should -Match 'AllowEmptyCollection' $script:runtimeContent | Should -Match 'ops-monitoring' $script:runtimeContent | Should -Match 'ops-autoremediate' $script:runtimeContent | Should -Match 'release-control-plane' diff --git a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 index 4257e5a..3eda429 100644 --- a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 @@ -37,6 +37,7 @@ Describe 'Release rollback drill workflow contract' { It 'validates channel-specific release history and required rollback assets' { $script:runtimeContent | Should -Match "ValidateSet\('stable', 'prerelease', 'canary'\)" + $script:runtimeContent | Should -Match 'AllowEmptyCollection' $script:runtimeContent | Should -Match 'rollback_candidate_missing' $script:runtimeContent | Should -Match 'rollback_assets_missing' $script:runtimeContent | Should -Match 'lvie-cdev-workspace-installer\.exe' From 6002cd206d0b28289bb49135da5edd3aa2d8e845 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 19:41:34 -0800 Subject: [PATCH 22/60] Add Scope 2 bounded self-healing for SLO and rollback gates --- .github/workflows/ops-slo-gate.yml | 64 ++- .github/workflows/release-rollback-drill.yml | 70 +++- AGENTS.md | 24 +- README.md | 32 +- .../runbooks/release-ops-incident-response.md | 16 + scripts/Invoke-OpsSloSelfHealing.ps1 | 286 +++++++++++++ scripts/Invoke-RollbackDrillSelfHealing.ps1 | 390 ++++++++++++++++++ scripts/Test-PolicyContracts.ps1 | 11 + scripts/Test-ReleaseClientContracts.ps1 | 11 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 9 + .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 1 + tests/OpsSloGateWorkflowContract.Tests.ps1 | 24 +- tests/ReleaseClientPolicyContract.Tests.ps1 | 13 + ...aseRollbackDrillWorkflowContract.Tests.ps1 | 28 +- tests/ScopeAOpsRunbookContract.Tests.ps1 | 5 + tests/WorkspaceSurfaceContract.Tests.ps1 | 21 + .../scripts/Test-PolicyContracts.ps1 | 11 + .../workspace-governance.json | 17 + workspace-governance.json | 17 + 19 files changed, 1018 insertions(+), 32 deletions(-) create mode 100644 scripts/Invoke-OpsSloSelfHealing.ps1 create mode 100644 scripts/Invoke-RollbackDrillSelfHealing.ps1 diff --git a/.github/workflows/ops-slo-gate.yml b/.github/workflows/ops-slo-gate.yml index bd77c40..7760577 100644 --- a/.github/workflows/ops-slo-gate.yml +++ b/.github/workflows/ops-slo-gate.yml @@ -20,10 +20,25 @@ on: required: false default: '12' type: string + auto_self_heal: + description: Enable bounded self-healing when SLO gate fails. + required: false + default: true + type: boolean + self_heal_max_attempts: + description: Maximum bounded self-healing attempts. + required: false + default: '1' + type: string + self_heal_watch_timeout_minutes: + description: Timeout minutes for each remediation workflow watch. + required: false + default: '45' + type: string permissions: contents: read - actions: read + actions: write issues: write jobs: @@ -34,7 +49,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Evaluate ops SLO gate + - name: Evaluate ops SLO gate with bounded self-healing shell: pwsh env: GH_TOKEN: ${{ github.token }} @@ -72,11 +87,40 @@ jobs: $syncGuardMaxAgeHours = $parsedMaxAge } - & pwsh -NoProfile -File ./scripts/Test-OpsSloGate.ps1 ` + $autoSelfHealText = [string]'${{ inputs.auto_self_heal }}' + $autoSelfHeal = $true + if (-not [string]::IsNullOrWhiteSpace($autoSelfHealText)) { + $autoSelfHeal = [System.Convert]::ToBoolean($autoSelfHealText) + } + + $selfHealMaxAttemptsText = [string]'${{ inputs.self_heal_max_attempts }}' + $selfHealMaxAttempts = 1 + if (-not [string]::IsNullOrWhiteSpace($selfHealMaxAttemptsText)) { + $parsedMaxAttempts = 0 + if (-not [int]::TryParse($selfHealMaxAttemptsText, [ref]$parsedMaxAttempts)) { + throw "self_heal_max_attempts must be an integer. actual='$selfHealMaxAttemptsText'" + } + $selfHealMaxAttempts = $parsedMaxAttempts + } + + $selfHealWatchTimeoutText = [string]'${{ inputs.self_heal_watch_timeout_minutes }}' + $selfHealWatchTimeout = 45 + if (-not [string]::IsNullOrWhiteSpace($selfHealWatchTimeoutText)) { + $parsedWatchTimeout = 0 + if (-not [int]::TryParse($selfHealWatchTimeoutText, [ref]$parsedWatchTimeout)) { + throw "self_heal_watch_timeout_minutes must be an integer. actual='$selfHealWatchTimeoutText'" + } + $selfHealWatchTimeout = $parsedWatchTimeout + } + + & pwsh -NoProfile -File ./scripts/Invoke-OpsSloSelfHealing.ps1 ` -SurfaceRepository '${{ github.repository }}' ` -LookbackDays $lookbackDays ` -MinSuccessRatePct $minSuccessRatePct ` -SyncGuardMaxAgeHours $syncGuardMaxAgeHours ` + -AutoRemediate:$autoSelfHeal ` + -MaxAttempts $selfHealMaxAttempts ` + -WatchTimeoutMinutes $selfHealWatchTimeout ` -OutputPath $reportPath - name: Upload ops SLO gate report @@ -103,16 +147,20 @@ jobs: } $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop - $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) - $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $finalReasonCodes = @($report.final_report.reason_codes | ForEach-Object { [string]$_ }) + $finalReasonCodeText = if ($finalReasonCodes.Count -gt 0) { [string]::Join(',', $finalReasonCodes) } else { 'unknown' } + $attemptCount = @($report.remediation_attempts).Count $body = @" Ops SLO gate failed. - Run: $env:RUN_URL - - Reason codes: $reasonCodeText + - Reason code: $($report.reason_code) + - Final reason codes: $finalReasonCodeText - Message: $($report.message) - Lookback days: $($report.lookback_days) - Min success rate pct: $($report.min_success_rate_pct) + - Auto remediate: $($report.auto_remediate) + - Remediation attempts: $attemptCount "@ & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` @@ -138,12 +186,16 @@ jobs: } $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $attemptCount = @($report.remediation_attempts).Count $body = @" Ops SLO gate recovered. - Run: $env:RUN_URL + - Reason code: $($report.reason_code) - Message: $($report.message) - Lookback days: $($report.lookback_days) + - Auto remediated: $($report.reason_code -eq 'remediated') + - Remediation attempts: $attemptCount "@ & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` diff --git a/.github/workflows/release-rollback-drill.yml b/.github/workflows/release-rollback-drill.yml index d667801..b9540d9 100644 --- a/.github/workflows/release-rollback-drill.yml +++ b/.github/workflows/release-rollback-drill.yml @@ -19,9 +19,25 @@ on: required: false default: '2' type: string + auto_self_heal: + description: Enable bounded self-healing when rollback readiness fails. + required: false + default: true + type: boolean + self_heal_max_attempts: + description: Maximum bounded self-healing attempts. + required: false + default: '1' + type: string + self_heal_watch_timeout_minutes: + description: Timeout minutes for each release workflow watch. + required: false + default: '120' + type: string permissions: contents: read + actions: write issues: write jobs: @@ -32,7 +48,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Execute rollback drill + - name: Execute rollback drill with bounded self-healing shell: pwsh env: GH_TOKEN: ${{ github.token }} @@ -55,10 +71,40 @@ jobs: $requiredHistoryCount = $parsedRequiredHistory } - & pwsh -NoProfile -File ./scripts/Invoke-ReleaseRollbackDrill.ps1 ` + $autoSelfHealText = [string]'${{ inputs.auto_self_heal }}' + $autoSelfHeal = $true + if (-not [string]::IsNullOrWhiteSpace($autoSelfHealText)) { + $autoSelfHeal = [System.Convert]::ToBoolean($autoSelfHealText) + } + + $selfHealMaxAttemptsText = [string]'${{ inputs.self_heal_max_attempts }}' + $selfHealMaxAttempts = 1 + if (-not [string]::IsNullOrWhiteSpace($selfHealMaxAttemptsText)) { + $parsedMaxAttempts = 0 + if (-not [int]::TryParse($selfHealMaxAttemptsText, [ref]$parsedMaxAttempts)) { + throw "self_heal_max_attempts must be an integer. actual='$selfHealMaxAttemptsText'" + } + $selfHealMaxAttempts = $parsedMaxAttempts + } + + $selfHealWatchTimeoutText = [string]'${{ inputs.self_heal_watch_timeout_minutes }}' + $selfHealWatchTimeout = 120 + if (-not [string]::IsNullOrWhiteSpace($selfHealWatchTimeoutText)) { + $parsedWatchTimeout = 0 + if (-not [int]::TryParse($selfHealWatchTimeoutText, [ref]$parsedWatchTimeout)) { + throw "self_heal_watch_timeout_minutes must be an integer. actual='$selfHealWatchTimeoutText'" + } + $selfHealWatchTimeout = $parsedWatchTimeout + } + + & pwsh -NoProfile -File ./scripts/Invoke-RollbackDrillSelfHealing.ps1 ` -Repository '${{ github.repository }}' ` + -Branch 'main' ` -Channel $channel ` -RequiredHistoryCount $requiredHistoryCount ` + -AutoRemediate:$autoSelfHeal ` + -MaxAttempts $selfHealMaxAttempts ` + -WatchTimeoutMinutes $selfHealWatchTimeout ` -OutputPath $reportPath - name: Upload rollback drill report @@ -85,16 +131,22 @@ jobs: } $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop - $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) - $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $finalReasonCodes = @($report.final_report.reason_codes | ForEach-Object { [string]$_ }) + $finalReasonCodeText = if ($finalReasonCodes.Count -gt 0) { [string]::Join(',', $finalReasonCodes) } else { 'unknown' } + $attemptCount = @($report.remediation_attempts).Count + $finalCandidateCount = if ($null -ne $report.final_report) { [string]$report.final_report.candidate_count } else { 'unknown' } $body = @" Release rollback drill failed. - Run: $env:RUN_URL - Channel: $($report.channel) - - Reason codes: $reasonCodeText + - Reason code: $($report.reason_code) + - Final reason codes: $finalReasonCodeText - Message: $($report.message) - - Candidate count: $($report.candidate_count) + - Initial candidate count: $($report.initial_report.candidate_count) + - Final candidate count: $finalCandidateCount + - Auto remediate: $($report.auto_remediate) + - Remediation attempts: $attemptCount "@ & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` @@ -120,12 +172,18 @@ jobs: } $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $attemptCount = @($report.remediation_attempts).Count + $finalCandidateCount = if ($null -ne $report.final_report) { [string]$report.final_report.candidate_count } else { 'unknown' } $body = @" Release rollback drill recovered. - Run: $env:RUN_URL - Channel: $($report.channel) + - Reason code: $($report.reason_code) - Message: $($report.message) + - Final candidate count: $finalCandidateCount + - Auto remediated: $($report.reason_code -eq 'remediated') + - Remediation attempts: $attemptCount "@ & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` diff --git a/AGENTS.md b/AGENTS.md index a4b2851..31e164c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -218,8 +218,15 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - stable: `80-99` - Promotion must gate on source release integrity (required assets + source commit equals branch head). - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. -- `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Test-OpsSloGate.ps1`. -- SLO gate reason codes must remain explicit: +- `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Invoke-OpsSloSelfHealing.ps1`. +- SLO self-healing reason codes must remain explicit: + - `already_healthy` + - `remediated` + - `auto_remediation_disabled` + - `remediation_execution_failed` + - `remediation_verify_failed` + - `slo_self_heal_runtime_error` +- Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` reason codes must remain explicit: - `workflow_missing_runs` - `workflow_failure_detected` - `workflow_success_rate_below_threshold` @@ -234,9 +241,18 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `release_client_drift` - `runtime_images_missing` - `ops_control_plane_policy_missing` + - `ops_control_plane_self_healing_missing` - `policy_drift_runtime_error` -- `.github/workflows/release-rollback-drill.yml` must run `scripts/Invoke-ReleaseRollbackDrill.ps1`. -- Rollback drill reason codes must remain explicit: +- `.github/workflows/release-rollback-drill.yml` must run `scripts/Invoke-RollbackDrillSelfHealing.ps1`. +- Rollback self-healing reason codes must remain explicit: + - `already_ready` + - `remediated` + - `auto_remediation_disabled` + - `no_automatable_action` + - `remediation_execution_failed` + - `remediation_verify_failed` + - `rollback_self_heal_runtime_error` +- Underlying rollback evaluator `scripts/Invoke-ReleaseRollbackDrill.ps1` reason codes must remain explicit: - `rollback_candidate_missing` - `rollback_assets_missing` - `rollback_drill_runtime_error` diff --git a/README.md b/README.md index b18d0a0..9de48d1 100644 --- a/README.md +++ b/README.md @@ -388,16 +388,22 @@ Control-plane behavior: `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. -`ops-slo-gate.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Test-OpsSloGate.ps1` to enforce: +`ops-slo-gate.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-OpsSloSelfHealing.ps1` to enforce: - 7-day lookback by default - 100% success-rate target for `ops-monitoring`, `ops-autoremediate`, and `release-control-plane` - max sync-guard success age of 12 hours +- bounded self-healing by dispatching `ops-autoremediate.yml` and re-verifying SLO status - deterministic reason codes on failure: - - `workflow_missing_runs` - - `workflow_failure_detected` - - `workflow_success_rate_below_threshold` - - `sync_guard_missing` - - `sync_guard_stale` + - `auto_remediation_disabled` + - `remediation_verify_failed` + - `slo_self_heal_runtime_error` + +Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic `reason_codes`: +- `workflow_missing_runs` +- `workflow_failure_detected` +- `workflow_success_rate_below_threshold` +- `sync_guard_missing` +- `sync_guard_stale` `ops-policy-drift-check.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Test-ReleaseControlPlanePolicyDrift.ps1` and fails on: - root/payload release-client policy drift @@ -407,13 +413,21 @@ Control-plane behavior: - `release_client_drift` - `runtime_images_missing` - `ops_control_plane_policy_missing` + - `ops_control_plane_self_healing_missing` -`release-rollback-drill.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-ReleaseRollbackDrill.ps1` to validate deterministic rollback readiness: +`release-rollback-drill.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-RollbackDrillSelfHealing.ps1` to validate deterministic rollback readiness: - channel-scoped latest/previous release candidates - required release assets for rollback safety (`installer`, `.sha256`, `reproducibility-report.json`, SPDX/SLSA, `release-manifest.json`) +- bounded self-healing for `rollback_candidate_missing` by dispatching one canary release and re-verifying rollback readiness - deterministic reason codes on failure: - - `rollback_candidate_missing` - - `rollback_assets_missing` + - `auto_remediation_disabled` + - `no_automatable_action` + - `remediation_verify_failed` + - `rollback_self_heal_runtime_error` + +Underlying rollback evaluator `scripts/Invoke-ReleaseRollbackDrill.ps1` still emits deterministic `reason_codes`: +- `rollback_candidate_missing` +- `rollback_assets_missing` ## Local Docker package for control-plane exercise diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 57bb0f5..cca40a5 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -130,6 +130,13 @@ Run strict SLO gate with default 7-day window: gh workflow run ops-slo-gate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ``` +The workflow runs bounded self-healing by default. Disable it for diagnostics: + +```powershell +gh workflow run ops-slo-gate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f auto_self_heal=false +``` + Run with explicit thresholds: ```powershell @@ -155,6 +162,15 @@ gh workflow run release-rollback-drill.yml -R LabVIEW-Community-CI-CD/labview-cd -f required_history_count=2 ``` +The workflow performs bounded self-healing by default for `rollback_candidate_missing` by dispatching one canary release and then re-checking rollback readiness. Disable for diagnostics: + +```powershell +gh workflow run release-rollback-drill.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f channel=canary ` + -f required_history_count=2 ` + -f auto_self_heal=false +``` + ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` diff --git a/scripts/Invoke-OpsSloSelfHealing.ps1 b/scripts/Invoke-OpsSloSelfHealing.ps1 new file mode 100644 index 0000000..6772405 --- /dev/null +++ b/scripts/Invoke-OpsSloSelfHealing.ps1 @@ -0,0 +1,286 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$SurfaceRepository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidateRange(1, 90)] + [int]$LookbackDays = 7, + + [Parameter()] + [ValidateRange(0, 100)] + [double]$MinSuccessRatePct = 100, + + [Parameter()] + [ValidateRange(1, 168)] + [int]$SyncGuardMaxAgeHours = 12, + + [Parameter()] + [bool]$AutoRemediate = $true, + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$RemediationWorkflow = 'ops-autoremediate.yml', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$RemediationBranch = 'main', + + [Parameter()] + [ValidateRange(1, 5)] + [int]$MaxAttempts = 1, + + [Parameter()] + [ValidateRange(5, 240)] + [int]$WatchTimeoutMinutes = 45, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$sloGateScript = Join-Path $PSScriptRoot 'Test-OpsSloGate.ps1' +$dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' +$watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' + +foreach ($requiredScript in @($sloGateScript, $dispatchWorkflowScript, $watchWorkflowScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +function Invoke-SloGateAssessment { + param( + [Parameter(Mandatory = $true)][string]$ScriptPath, + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter(Mandatory = $true)][int]$WindowDays, + [Parameter(Mandatory = $true)][double]$SuccessThreshold, + [Parameter(Mandatory = $true)][int]$SyncGuardHours, + [Parameter(Mandatory = $true)][string]$ReportPath + ) + + $runtimeError = '' + $exitCode = 1 + try { + & pwsh -NoProfile -File $ScriptPath ` + -SurfaceRepository $Repository ` + -LookbackDays $WindowDays ` + -MinSuccessRatePct $SuccessThreshold ` + -SyncGuardMaxAgeHours $SyncGuardHours ` + -OutputPath $ReportPath | Out-Null + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + } catch { + $runtimeError = [string]$_.Exception.Message + $exitCode = 1 + } + + $loadedReport = $null + if (Test-Path -LiteralPath $ReportPath -PathType Leaf) { + $loadedReport = Get-Content -LiteralPath $ReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + + if ($null -eq $loadedReport) { + $message = if ([string]::IsNullOrWhiteSpace($runtimeError)) { + "slo_gate_report_missing: $ReportPath" + } else { + $runtimeError + } + $loadedReport = [pscustomobject]@{ + status = 'fail' + reason_codes = @('slo_gate_runtime_error') + message = $message + } + } elseif (-not [string]::IsNullOrWhiteSpace($runtimeError)) { + $loadedReport.status = 'fail' + $loadedReport.reason_codes = @('slo_gate_runtime_error') + $loadedReport.message = $runtimeError + } + + return [pscustomobject]@{ + exit_code = $exitCode + report = $loadedReport + } +} + +$scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("ops-slo-self-heal-" + [Guid]::NewGuid().ToString('N')) +New-Item -Path $scratchRoot -ItemType Directory -Force | Out-Null + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + surface_repository = $SurfaceRepository + lookback_days = $LookbackDays + min_success_rate_pct = $MinSuccessRatePct + sync_guard_max_age_hours = $SyncGuardMaxAgeHours + auto_remediate = [bool]$AutoRemediate + remediation_workflow = $RemediationWorkflow + remediation_branch = $RemediationBranch + max_attempts = $MaxAttempts + watch_timeout_minutes = $WatchTimeoutMinutes + status = 'fail' + reason_code = '' + message = '' + initial_report = $null + remediation_attempts = @() + final_report = $null +} + +try { + $initialPath = Join-Path $scratchRoot 'initial-slo-gate.json' + $initialAssessment = Invoke-SloGateAssessment ` + -ScriptPath $sloGateScript ` + -Repository $SurfaceRepository ` + -WindowDays $LookbackDays ` + -SuccessThreshold $MinSuccessRatePct ` + -SyncGuardHours $SyncGuardMaxAgeHours ` + -ReportPath $initialPath + $initialReport = $initialAssessment.report + $report.initial_report = $initialReport + $report.final_report = $initialReport + + if ([string]$initialReport.status -eq 'pass') { + $report.status = 'pass' + $report.reason_code = 'already_healthy' + $report.message = 'SLO gate is already passing. No remediation was required.' + } elseif (-not $AutoRemediate) { + $report.status = 'fail' + $report.reason_code = 'auto_remediation_disabled' + $report.message = 'SLO gate failed and auto-remediation is disabled.' + } else { + $attemptRecords = [System.Collections.Generic.List[object]]::new() + $recovered = $false + $hadExecutionFailure = $false + $lastExecutionError = '' + $finalReport = $initialReport + $normalizedMaxAttempts = [Math]::Max(1, [Math]::Min($MaxAttempts, 5)) + + for ($attempt = 1; $attempt -le $normalizedMaxAttempts; $attempt++) { + $attemptRecord = [ordered]@{ + attempt = $attempt + status = 'pending' + dispatch = $null + watch = $null + verify = $null + error = '' + } + + $executionOk = $true + try { + $dispatchPath = Join-Path $scratchRoot ("attempt-{0}-dispatch.json" -f $attempt) + & pwsh -NoProfile -File $dispatchWorkflowScript ` + -Repository $SurfaceRepository ` + -WorkflowFile $RemediationWorkflow ` + -Branch $RemediationBranch ` + -Input @("sync_guard_max_age_hours=$SyncGuardMaxAgeHours") ` + -OutputPath $dispatchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "slo_remediation_dispatch_failed: exit_code=$LASTEXITCODE" + } + $dispatchReport = Get-Content -LiteralPath $dispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $attemptRecord.dispatch = [ordered]@{ + run_id = [string]$dispatchReport.run_id + head_sha = [string]$dispatchReport.head_sha + url = [string]$dispatchReport.url + } + + $watchPath = Join-Path $scratchRoot ("attempt-{0}-watch.json" -f $attempt) + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $SurfaceRepository ` + -RunId ([string]$dispatchReport.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -OutputPath $watchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "slo_remediation_watch_failed: run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" + } + $watchReport = Get-Content -LiteralPath $watchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $attemptRecord.watch = [ordered]@{ + run_id = [string]$watchReport.run_id + conclusion = [string]$watchReport.conclusion + url = [string]$watchReport.url + classified_reason = [string]$watchReport.classified_reason + } + } catch { + $executionOk = $false + $hadExecutionFailure = $true + $lastExecutionError = [string]$_.Exception.Message + $attemptRecord.error = $lastExecutionError + } + + $verifyPath = Join-Path $scratchRoot ("attempt-{0}-verify.json" -f $attempt) + $verifyAssessment = Invoke-SloGateAssessment ` + -ScriptPath $sloGateScript ` + -Repository $SurfaceRepository ` + -WindowDays $LookbackDays ` + -SuccessThreshold $MinSuccessRatePct ` + -SyncGuardHours $SyncGuardMaxAgeHours ` + -ReportPath $verifyPath + + $verifyReport = $verifyAssessment.report + $finalReport = $verifyReport + $attemptRecord.verify = [ordered]@{ + status = [string]$verifyReport.status + reason_codes = @($verifyReport.reason_codes | ForEach-Object { [string]$_ }) + message = [string]$verifyReport.message + } + + if ($executionOk -and [string]$verifyReport.status -eq 'pass') { + $attemptRecord.status = 'recovered' + [void]$attemptRecords.Add($attemptRecord) + $recovered = $true + break + } + + if (-not $executionOk) { + $attemptRecord.status = 'remediation_execution_failed' + } else { + $attemptRecord.status = 'verify_failed' + } + [void]$attemptRecords.Add($attemptRecord) + } + + $report.remediation_attempts = @($attemptRecords) + $report.final_report = $finalReport + + if ($recovered) { + $report.status = 'pass' + $report.reason_code = 'remediated' + $report.message = 'SLO gate auto-remediation completed and verification passed.' + } else { + $report.status = 'fail' + if ($hadExecutionFailure -and $null -eq $finalReport) { + $report.reason_code = 'remediation_execution_failed' + $report.message = $lastExecutionError + } else { + $report.reason_code = 'remediation_verify_failed' + $finalReasons = @() + if ($null -ne $finalReport) { + $finalReasons = @($finalReport.reason_codes | ForEach-Object { [string]$_ }) + } + $finalReasonText = if ($finalReasons.Count -gt 0) { [string]::Join(',', $finalReasons) } else { 'unknown' } + $report.message = "SLO gate remains failing after bounded remediation. final_reason_codes=$finalReasonText" + } + } + } +} catch { + $report.status = 'fail' + $report.reason_code = 'slo_self_heal_runtime_error' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + if (Test-Path -LiteralPath $scratchRoot -PathType Container) { + Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Invoke-RollbackDrillSelfHealing.ps1 b/scripts/Invoke-RollbackDrillSelfHealing.ps1 new file mode 100644 index 0000000..3e8011f --- /dev/null +++ b/scripts/Invoke-RollbackDrillSelfHealing.ps1 @@ -0,0 +1,390 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$Branch = 'main', + + [Parameter()] + [ValidateSet('stable', 'prerelease', 'canary')] + [string]$Channel = 'canary', + + [Parameter()] + [ValidateRange(2, 100)] + [int]$RequiredHistoryCount = 2, + + [Parameter()] + [ValidateRange(10, 200)] + [int]$ReleaseLimit = 100, + + [Parameter()] + [bool]$AutoRemediate = $true, + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$ReleaseWorkflowFile = 'release-workspace-installer.yml', + + [Parameter()] + [ValidateRange(1, 5)] + [int]$MaxAttempts = 1, + + [Parameter()] + [ValidateRange(5, 240)] + [int]$WatchTimeoutMinutes = 120, + + [Parameter()] + [ValidateRange(1, 49)] + [int]$CanarySequenceMin = 1, + + [Parameter()] + [ValidateRange(1, 99)] + [int]$CanarySequenceMax = 49, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$rollbackDrillScript = Join-Path $PSScriptRoot 'Invoke-ReleaseRollbackDrill.ps1' +$dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' +$watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' + +foreach ($requiredScript in @($rollbackDrillScript, $dispatchWorkflowScript, $watchWorkflowScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +function Parse-ReleaseTagRecord { + param([Parameter(Mandatory = $true)][string]$TagName) + + $match = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') + if (-not $match.Success) { + return $null + } + + $sequence = 0 + if (-not [int]::TryParse([string]$match.Groups['sequence'].Value, [ref]$sequence)) { + return $null + } + + return [pscustomobject]@{ + tag_name = $TagName + date = [string]$match.Groups['date'].Value + sequence = $sequence + } +} + +function Get-NextCanaryTag { + param( + [Parameter(Mandatory = $true)][string]$TargetRepository, + [Parameter(Mandatory = $true)][int]$MaxReleases, + [Parameter(Mandatory = $true)][int]$RangeMin, + [Parameter(Mandatory = $true)][int]$RangeMax + ) + + if ($RangeMin -gt $RangeMax) { + throw "canary_range_invalid: min=$RangeMin max=$RangeMax" + } + + $dateKey = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') + $releases = @(Get-GhReleasesPortable -Repository $TargetRepository -Limit $MaxReleases -ExcludeDrafts) + $matched = @() + foreach ($release in $releases) { + if (-not [bool]$release.isPrerelease) { + continue + } + + $parsed = Parse-ReleaseTagRecord -TagName ([string]$release.tagName) + if ($null -eq $parsed) { + continue + } + if ([string]$parsed.date -ne $dateKey) { + continue + } + if ([int]$parsed.sequence -lt $RangeMin -or [int]$parsed.sequence -gt $RangeMax) { + continue + } + + $matched += [int]$parsed.sequence + } + + $nextSequence = if (@($matched).Count -eq 0) { + $RangeMin + } else { + ((@($matched) | Measure-Object -Maximum).Maximum + 1) + } + + if ($nextSequence -gt $RangeMax) { + throw "canary_tag_range_exhausted: date=$dateKey next_sequence=$nextSequence range_max=$RangeMax" + } + + return [pscustomobject]@{ + date_key = $dateKey + sequence = $nextSequence + tag = "v0.$dateKey.$nextSequence" + } +} + +function Invoke-RollbackAssessment { + param( + [Parameter(Mandatory = $true)][string]$ScriptPath, + [Parameter(Mandatory = $true)][string]$TargetRepository, + [Parameter(Mandatory = $true)][string]$TargetChannel, + [Parameter(Mandatory = $true)][int]$HistoryCount, + [Parameter(Mandatory = $true)][int]$Limit, + [Parameter(Mandatory = $true)][string]$ReportPath + ) + + $runtimeError = '' + $exitCode = 1 + try { + & pwsh -NoProfile -File $ScriptPath ` + -Repository $TargetRepository ` + -Channel $TargetChannel ` + -RequiredHistoryCount $HistoryCount ` + -ReleaseLimit $Limit ` + -OutputPath $ReportPath | Out-Null + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + } catch { + $runtimeError = [string]$_.Exception.Message + $exitCode = 1 + } + + $loadedReport = $null + if (Test-Path -LiteralPath $ReportPath -PathType Leaf) { + $loadedReport = Get-Content -LiteralPath $ReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + + if ($null -eq $loadedReport) { + $message = if ([string]::IsNullOrWhiteSpace($runtimeError)) { + "rollback_drill_report_missing: $ReportPath" + } else { + $runtimeError + } + $loadedReport = [pscustomobject]@{ + status = 'fail' + reason_codes = @('rollback_drill_runtime_error') + message = $message + candidate_count = 0 + } + } elseif (-not [string]::IsNullOrWhiteSpace($runtimeError)) { + $loadedReport.status = 'fail' + $loadedReport.reason_codes = @('rollback_drill_runtime_error') + $loadedReport.message = $runtimeError + } + + return [pscustomobject]@{ + exit_code = $exitCode + report = $loadedReport + } +} + +$scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("rollback-self-heal-" + [Guid]::NewGuid().ToString('N')) +New-Item -Path $scratchRoot -ItemType Directory -Force | Out-Null + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + branch = $Branch + channel = $Channel + required_history_count = $RequiredHistoryCount + release_limit = $ReleaseLimit + auto_remediate = [bool]$AutoRemediate + release_workflow = $ReleaseWorkflowFile + max_attempts = $MaxAttempts + watch_timeout_minutes = $WatchTimeoutMinutes + canary_sequence_min = $CanarySequenceMin + canary_sequence_max = $CanarySequenceMax + status = 'fail' + reason_code = '' + message = '' + initial_report = $null + remediation_attempts = @() + final_report = $null +} + +try { + $initialPath = Join-Path $scratchRoot 'initial-rollback-drill.json' + $initialAssessment = Invoke-RollbackAssessment ` + -ScriptPath $rollbackDrillScript ` + -TargetRepository $Repository ` + -TargetChannel $Channel ` + -HistoryCount $RequiredHistoryCount ` + -Limit $ReleaseLimit ` + -ReportPath $initialPath + $initialReport = $initialAssessment.report + $report.initial_report = $initialReport + $report.final_report = $initialReport + + if ([string]$initialReport.status -eq 'pass') { + $report.status = 'pass' + $report.reason_code = 'already_ready' + $report.message = 'Rollback drill is already passing. No remediation was required.' + } elseif (-not $AutoRemediate) { + $report.status = 'fail' + $report.reason_code = 'auto_remediation_disabled' + $report.message = 'Rollback drill failed and auto-remediation is disabled.' + } else { + $initialReasons = @($initialReport.reason_codes | ForEach-Object { [string]$_ }) + $canAutomate = (($initialReasons -contains 'rollback_candidate_missing') -and ([string]$Channel -eq 'canary')) + if (-not $canAutomate) { + $report.status = 'fail' + $report.reason_code = 'no_automatable_action' + $report.message = "Rollback drill failed with no automatable remediation path. reason_codes=$([string]::Join(',', $initialReasons))" + } else { + $attemptRecords = [System.Collections.Generic.List[object]]::new() + $recovered = $false + $hadExecutionFailure = $false + $lastExecutionError = '' + $finalReport = $initialReport + $normalizedMaxAttempts = [Math]::Max(1, [Math]::Min($MaxAttempts, 5)) + + for ($attempt = 1; $attempt -le $normalizedMaxAttempts; $attempt++) { + $attemptRecord = [ordered]@{ + attempt = $attempt + status = 'pending' + target_tag = '' + dispatch = $null + watch = $null + verify = $null + error = '' + } + + $executionOk = $true + try { + $targetTagRecord = Get-NextCanaryTag ` + -TargetRepository $Repository ` + -MaxReleases $ReleaseLimit ` + -RangeMin $CanarySequenceMin ` + -RangeMax $CanarySequenceMax + $attemptRecord.target_tag = [string]$targetTagRecord.tag + + $dispatchPath = Join-Path $scratchRoot ("attempt-{0}-dispatch.json" -f $attempt) + & pwsh -NoProfile -File $dispatchWorkflowScript ` + -Repository $Repository ` + -WorkflowFile $ReleaseWorkflowFile ` + -Branch $Branch ` + -Input @( + "release_tag=$([string]$targetTagRecord.tag)", + 'allow_existing_tag=false', + 'prerelease=true', + 'release_channel=canary' + ) ` + -OutputPath $dispatchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "rollback_auto_release_dispatch_failed: exit_code=$LASTEXITCODE" + } + $dispatchReport = Get-Content -LiteralPath $dispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $attemptRecord.dispatch = [ordered]@{ + run_id = [string]$dispatchReport.run_id + head_sha = [string]$dispatchReport.head_sha + url = [string]$dispatchReport.url + } + + $watchPath = Join-Path $scratchRoot ("attempt-{0}-watch.json" -f $attempt) + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $Repository ` + -RunId ([string]$dispatchReport.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -OutputPath $watchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "rollback_auto_release_watch_failed: run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" + } + $watchReport = Get-Content -LiteralPath $watchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $attemptRecord.watch = [ordered]@{ + run_id = [string]$watchReport.run_id + conclusion = [string]$watchReport.conclusion + url = [string]$watchReport.url + classified_reason = [string]$watchReport.classified_reason + } + } catch { + $executionOk = $false + $hadExecutionFailure = $true + $lastExecutionError = [string]$_.Exception.Message + $attemptRecord.error = $lastExecutionError + } + + $verifyPath = Join-Path $scratchRoot ("attempt-{0}-verify.json" -f $attempt) + $verifyAssessment = Invoke-RollbackAssessment ` + -ScriptPath $rollbackDrillScript ` + -TargetRepository $Repository ` + -TargetChannel $Channel ` + -HistoryCount $RequiredHistoryCount ` + -Limit $ReleaseLimit ` + -ReportPath $verifyPath + + $verifyReport = $verifyAssessment.report + $finalReport = $verifyReport + $attemptRecord.verify = [ordered]@{ + status = [string]$verifyReport.status + reason_codes = @($verifyReport.reason_codes | ForEach-Object { [string]$_ }) + message = [string]$verifyReport.message + candidate_count = [int]$verifyReport.candidate_count + } + + if ($executionOk -and [string]$verifyReport.status -eq 'pass') { + $attemptRecord.status = 'recovered' + [void]$attemptRecords.Add($attemptRecord) + $recovered = $true + break + } + + if (-not $executionOk) { + $attemptRecord.status = 'remediation_execution_failed' + } else { + $attemptRecord.status = 'verify_failed' + } + [void]$attemptRecords.Add($attemptRecord) + } + + $report.remediation_attempts = @($attemptRecords) + $report.final_report = $finalReport + + if ($recovered) { + $report.status = 'pass' + $report.reason_code = 'remediated' + $report.message = 'Rollback drill auto-remediation completed and verification passed.' + } else { + $report.status = 'fail' + if ($hadExecutionFailure -and $null -eq $finalReport) { + $report.reason_code = 'remediation_execution_failed' + $report.message = $lastExecutionError + } else { + $report.reason_code = 'remediation_verify_failed' + $finalReasons = @() + if ($null -ne $finalReport) { + $finalReasons = @($finalReport.reason_codes | ForEach-Object { [string]$_ }) + } + $finalReasonText = if ($finalReasons.Count -gt 0) { [string]::Join(',', $finalReasons) } else { 'unknown' } + $report.message = "Rollback drill remains failing after bounded remediation. final_reason_codes=$finalReasonText" + } + } + } + } +} catch { + $report.status = 'fail' + $report.reason_code = 'rollback_self_heal_runtime_error' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + if (Test-Path -LiteralPath $scratchRoot -PathType Container) { + Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 38bbd08..63575aa 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -177,6 +177,17 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes -eq 45) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow -eq 'release-workspace-installer.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_branch' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch -eq 'main') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_canary_min' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_canary_max' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max -eq 49) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_channel' -Passed ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel -eq 'canary') -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_history_count' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count -eq 2) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_release_limit' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index efcc3b9..61584ee 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -101,6 +101,17 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_incident_auto_close_on_recovery' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Name 'ops_policy_incident_reopen_on_regression' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) Add-Check -Name 'ops_policy_incident_title_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Control Plane Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) + Add-Check -Name 'ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) + Add-Check -Name 'ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) + Add-Check -Name 'ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) + Add-Check -Name 'ops_policy_self_healing_slo_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes -eq 45) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes) + Add-Check -Name 'ops_policy_self_healing_slo_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) + Add-Check -Name 'ops_policy_self_healing_rollback_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow -eq 'release-workspace-installer.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow) + Add-Check -Name 'ops_policy_self_healing_rollback_branch' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch -eq 'main') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch) + Add-Check -Name 'ops_policy_self_healing_rollback_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes) + Add-Check -Name 'ops_policy_self_healing_rollback_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation) + Add-Check -Name 'ops_policy_self_healing_rollback_canary_min' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min) + Add-Check -Name 'ops_policy_self_healing_rollback_canary_max' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max -eq 49) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max) Add-Check -Name 'ops_policy_rollback_channel' -Passed ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel -eq 'canary') -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel) Add-Check -Name 'ops_policy_rollback_required_history_count' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count -eq 2) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count) Add-Check -Name 'ops_policy_rollback_release_limit' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index 30b79eb..0a226bd 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -91,6 +91,15 @@ try { }) | Out-Null if (-not $opsPolicyPresent) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_policy_missing' + } else { + $selfHealingPresent = ($null -ne $releaseClient.ops_control_plane_policy.self_healing) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_self_healing_present' + passed = $selfHealingPresent + }) | Out-Null + if (-not $selfHealingPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_self_healing_missing' + } } } } diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index b21fff1..1c9aadf 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -39,5 +39,6 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'release_client_drift' $script:runtimeContent | Should -Match 'runtime_images_missing' $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_self_healing_missing' } } diff --git a/tests/OpsSloGateWorkflowContract.Tests.ps1 b/tests/OpsSloGateWorkflowContract.Tests.ps1 index 97324bd..7375a59 100644 --- a/tests/OpsSloGateWorkflowContract.Tests.ps1 +++ b/tests/OpsSloGateWorkflowContract.Tests.ps1 @@ -8,8 +8,9 @@ Describe 'Ops SLO gate workflow contract' { $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' $script:runtimePath = Join-Path $script:repoRoot 'scripts/Test-OpsSloGate.ps1' + $script:selfHealingPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsSloSelfHealing.ps1' - foreach ($path in @($script:workflowPath, $script:runtimePath)) { + foreach ($path in @($script:workflowPath, $script:runtimePath, $script:selfHealingPath)) { if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { throw "Ops SLO gate contract file missing: $path" } @@ -17,6 +18,7 @@ Describe 'Ops SLO gate workflow contract' { $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + $script:selfHealingContent = Get-Content -LiteralPath $script:selfHealingPath -Raw } It 'is scheduled and dispatchable with deterministic SLO inputs' { @@ -25,15 +27,19 @@ Describe 'Ops SLO gate workflow contract' { $script:workflowContent | Should -Match 'lookback_days' $script:workflowContent | Should -Match 'min_success_rate_pct' $script:workflowContent | Should -Match 'sync_guard_max_age_hours' + $script:workflowContent | Should -Match 'auto_self_heal' + $script:workflowContent | Should -Match 'self_heal_max_attempts' + $script:workflowContent | Should -Match 'self_heal_watch_timeout_minutes' } - It 'runs SLO gate runtime, uploads report, and manages incident lifecycle' { - $script:workflowContent | Should -Match 'Test-OpsSloGate\.ps1' + It 'runs self-healing SLO runtime, uploads report, and manages incident lifecycle' { + $script:workflowContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:workflowContent | Should -Match 'ops-slo-gate-report\.json' $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'Ops SLO Gate Alert' $script:workflowContent | Should -Match '-Mode Fail' $script:workflowContent | Should -Match '-Mode Recover' + $script:workflowContent | Should -Match 'actions:\s*write' } It 'evaluates workflow and sync-guard SLO conditions with deterministic reason codes' { @@ -48,4 +54,16 @@ Describe 'Ops SLO gate workflow contract' { $script:runtimeContent | Should -Match 'sync_guard_stale' $script:runtimeContent | Should -Match 'sync_guard_missing' } + + It 'runs bounded SLO self-healing playbook with deterministic outcomes' { + $script:selfHealingContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' + $script:selfHealingContent | Should -Match 'Watch-WorkflowRun\.ps1' + $script:selfHealingContent | Should -Match 'ops-autoremediate\.yml' + $script:selfHealingContent | Should -Match 'sync_guard_max_age_hours' + $script:selfHealingContent | Should -Match 'already_healthy' + $script:selfHealingContent | Should -Match 'remediated' + $script:selfHealingContent | Should -Match 'auto_remediation_disabled' + $script:selfHealingContent | Should -Match 'remediation_verify_failed' + $script:selfHealingContent | Should -Match 'slo_self_heal_runtime_error' + } } diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index 7822868..6ff6fe5 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -63,6 +63,17 @@ Describe 'Release client policy contract' { @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops SLO Gate Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' + $releaseClient.ops_control_plane_policy.self_healing.enabled | Should -BeTrue + $releaseClient.ops_control_plane_policy.self_healing.max_attempts | Should -Be 1 + $releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow | Should -Be 'ops-autoremediate.yml' + $releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes | Should -Be 45 + $releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation | Should -BeTrue + $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow | Should -Be 'release-workspace-installer.yml' + $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch | Should -Be 'main' + $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes | Should -Be 120 + $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation | Should -BeTrue + $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min | Should -Be 1 + $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max | Should -Be 49 $releaseClient.ops_control_plane_policy.rollback_drill.channel | Should -Be 'canary' $releaseClient.ops_control_plane_policy.rollback_drill.required_history_count | Should -Be 2 $releaseClient.ops_control_plane_policy.rollback_drill.release_limit | Should -Be 100 @@ -82,6 +93,8 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'runtime_images_ops_runtime_base_digest' $script:policyScriptContent | Should -Match 'ops_control_plane_policy_exists' $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' + $script:policyScriptContent | Should -Match 'ops_policy_self_healing_enabled' + $script:policyScriptContent | Should -Match 'ops_policy_self_healing_rollback_workflow' $script:policyScriptContent | Should -Match 'ops_policy_rollback_release_limit' } diff --git a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 index 3eda429..fb18339 100644 --- a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 @@ -8,8 +8,9 @@ Describe 'Release rollback drill workflow contract' { $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' $script:runtimePath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' + $script:selfHealingPath = Join-Path $script:repoRoot 'scripts/Invoke-RollbackDrillSelfHealing.ps1' - foreach ($path in @($script:workflowPath, $script:runtimePath)) { + foreach ($path in @($script:workflowPath, $script:runtimePath, $script:selfHealingPath)) { if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { throw "Release rollback drill contract file missing: $path" } @@ -17,22 +18,27 @@ Describe 'Release rollback drill workflow contract' { $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + $script:selfHealingContent = Get-Content -LiteralPath $script:selfHealingPath -Raw } - It 'is scheduled and dispatchable with channel and history controls' { + It 'is scheduled and dispatchable with channel, history, and self-healing controls' { $script:workflowContent | Should -Match 'schedule:' $script:workflowContent | Should -Match 'workflow_dispatch:' $script:workflowContent | Should -Match 'channel:' $script:workflowContent | Should -Match 'required_history_count' + $script:workflowContent | Should -Match 'auto_self_heal' + $script:workflowContent | Should -Match 'self_heal_max_attempts' + $script:workflowContent | Should -Match 'self_heal_watch_timeout_minutes' } - It 'runs rollback drill runtime, uploads report, and manages incident lifecycle' { - $script:workflowContent | Should -Match 'Invoke-ReleaseRollbackDrill\.ps1' + It 'runs rollback self-healing runtime, uploads report, and manages incident lifecycle' { + $script:workflowContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:workflowContent | Should -Match 'release-rollback-drill-report\.json' $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'Release Rollback Drill Alert' $script:workflowContent | Should -Match '-Mode Fail' $script:workflowContent | Should -Match '-Mode Recover' + $script:workflowContent | Should -Match 'actions:\s*write' } It 'validates channel-specific release history and required rollback assets' { @@ -46,4 +52,18 @@ Describe 'Release rollback drill workflow contract' { $script:runtimeContent | Should -Match 'workspace-installer\.slsa\.json' $script:runtimeContent | Should -Match 'reproducibility-report\.json' } + + It 'runs bounded rollback self-healing by triggering canary release workflow and re-verifying' { + $script:selfHealingContent | Should -Match 'Invoke-ReleaseRollbackDrill\.ps1' + $script:selfHealingContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' + $script:selfHealingContent | Should -Match 'Watch-WorkflowRun\.ps1' + $script:selfHealingContent | Should -Match 'release-workspace-installer\.yml' + $script:selfHealingContent | Should -Match 'release_channel=canary' + $script:selfHealingContent | Should -Match 'allow_existing_tag=false' + $script:selfHealingContent | Should -Match 'rollback_candidate_missing' + $script:selfHealingContent | Should -Match 'already_ready' + $script:selfHealingContent | Should -Match 'remediated' + $script:selfHealingContent | Should -Match 'no_automatable_action' + $script:selfHealingContent | Should -Match 'rollback_self_heal_runtime_error' + } } diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index 809505c..df0f133 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -30,6 +30,7 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'ops-slo-gate\.yml' $script:runbookContent | Should -Match 'ops-policy-drift-check\.yml' $script:runbookContent | Should -Match 'release-rollback-drill\.yml' + $script:runbookContent | Should -Match 'auto_self_heal=false' $script:runbookContent | Should -Match '20260226' } @@ -39,6 +40,8 @@ Describe 'Scope A ops runbook contract' { $script:readmeContent | Should -Match 'ops-slo-gate\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' + $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' + $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'release-ops-incident-response\.md' $script:agentsContent | Should -Match 'Ops Monitoring Policy' @@ -48,5 +51,7 @@ Describe 'Scope A ops runbook contract' { $script:agentsContent | Should -Match 'ops-slo-gate\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' + $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' + $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' } } diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 98a8f9b..8d18717 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -32,8 +32,10 @@ Describe 'Workspace surface contract' { $script:testReleaseClientContractsScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseClientContracts.ps1' $script:opsIncidentLifecycleScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsIncidentLifecycle.ps1' $script:opsSloGateScriptPath = Join-Path $script:repoRoot 'scripts/Test-OpsSloGate.ps1' + $script:opsSloSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsSloSelfHealing.ps1' $script:opsPolicyDriftScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseControlPlanePolicyDrift.ps1' $script:rollbackDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' + $script:rollbackSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-RollbackDrillSelfHealing.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' $script:windowsContainerNsisSelfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' $script:windowsContainerNsisDockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' @@ -97,8 +99,10 @@ Describe 'Workspace surface contract' { $script:testReleaseClientContractsScriptPath, $script:opsIncidentLifecycleScriptPath, $script:opsSloGateScriptPath, + $script:opsSloSelfHealingScriptPath, $script:opsPolicyDriftScriptPath, $script:rollbackDrillScriptPath, + $script:rollbackSelfHealingScriptPath, $script:dockerLinuxIterationScriptPath, $script:windowsContainerNsisSelfTestScriptPath, $script:windowsContainerNsisDockerfilePath, @@ -278,6 +282,17 @@ Describe 'Workspace surface contract' { (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops SLO Gate Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.enabled | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.max_attempts | Should -Be 1 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow | Should -Be 'ops-autoremediate.yml' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes | Should -Be 45 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.release_workflow | Should -Be 'release-workspace-installer.yml' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.release_branch | Should -Be 'main' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes | Should -Be 120 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min | Should -Be 1 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max | Should -Be 49 $script:manifest.installer_contract.release_client.ops_control_plane_policy.rollback_drill.channel | Should -Be 'canary' $script:manifest.installer_contract.release_client.ops_control_plane_policy.rollback_drill.required_history_count | Should -Be 2 $script:manifest.installer_contract.release_client.ops_control_plane_policy.rollback_drill.release_limit | Should -Be 100 @@ -366,10 +381,13 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'ops-slo-gate\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' + $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' + $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:agentsContent | Should -Match 'workflow_failure_detected' $script:agentsContent | Should -Match 'release_client_drift' $script:agentsContent | Should -Match 'rollback_candidate_missing' + $script:agentsContent | Should -Match 'remediation_verify_failed' $script:agentsContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime' $script:agentsContent | Should -Match '8fef6f9192d81a14add28636c1100c109ae5e977' $script:agentsContent | Should -Match '0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' @@ -400,10 +418,13 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'ops-slo-gate\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' + $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' + $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:readmeContent | Should -Match 'workflow_failure_detected' $script:readmeContent | Should -Match 'release_client_drift' $script:readmeContent | Should -Match 'rollback_candidate_missing' + $script:readmeContent | Should -Match 'remediation_verify_failed' $script:readmeContent | Should -Match 'ghcr\.io/labview-community-ci-cd/labview-cdev-cli-runtime' $script:readmeContent | Should -Match '8fef6f9192d81a14add28636c1100c109ae5e977' $script:readmeContent | Should -Match '0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' diff --git a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 index 38bbd08..63575aa 100644 --- a/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 +++ b/workspace-governance-payload/workspace-governance/scripts/Test-PolicyContracts.ps1 @@ -177,6 +177,17 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes -eq 45) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow -eq 'release-workspace-installer.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_branch' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch -eq 'main') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.verify_after_remediation) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_canary_min' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_min) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_canary_max' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max -eq 49) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.canary_sequence_max) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_channel' -Passed ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel -eq 'canary') -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.channel) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_history_count' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count -eq 2) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.required_history_count) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_release_limit' -Passed ([int]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_drill.release_limit) diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 0cb57e3..45b45a5 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -276,6 +276,23 @@ "Release Rollback Drill Alert" ] }, + "self_healing": { + "enabled": true, + "max_attempts": 1, + "slo_gate": { + "remediation_workflow": "ops-autoremediate.yml", + "watch_timeout_minutes": 45, + "verify_after_remediation": true + }, + "rollback_drill": { + "release_workflow": "release-workspace-installer.yml", + "release_branch": "main", + "watch_timeout_minutes": 120, + "verify_after_remediation": true, + "canary_sequence_min": 1, + "canary_sequence_max": 49 + } + }, "rollback_drill": { "channel": "canary", "required_history_count": 2, diff --git a/workspace-governance.json b/workspace-governance.json index 0cb57e3..45b45a5 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -276,6 +276,23 @@ "Release Rollback Drill Alert" ] }, + "self_healing": { + "enabled": true, + "max_attempts": 1, + "slo_gate": { + "remediation_workflow": "ops-autoremediate.yml", + "watch_timeout_minutes": 45, + "verify_after_remediation": true + }, + "rollback_drill": { + "release_workflow": "release-workspace-installer.yml", + "release_branch": "main", + "watch_timeout_minutes": 120, + "verify_after_remediation": true, + "canary_sequence_min": 1, + "canary_sequence_max": 49 + } + }, "rollback_drill": { "channel": "canary", "required_history_count": 2, From 39708e6a8f563fb1b242ea5adfa3d0baefeee074 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 19:50:55 -0800 Subject: [PATCH 23/60] Fix workflow dispatch input binding for self-healing paths --- scripts/Dispatch-WorkflowAtRemoteHead.ps1 | 7 ++++--- scripts/Invoke-OpsSloSelfHealing.ps1 | 8 +++----- scripts/Invoke-ReleaseControlPlane.ps1 | 18 ++++++++---------- scripts/Invoke-RollbackDrillSelfHealing.ps1 | 18 ++++++++---------- scripts/lib/WorkflowOps.Common.ps1 | 8 ++++++-- ...patchWorkflowAtRemoteHeadContract.Tests.ps1 | 6 ++++++ tests/OpsSloGateWorkflowContract.Tests.ps1 | 2 ++ ...leaseControlPlaneWorkflowContract.Tests.ps1 | 2 ++ ...easeRollbackDrillWorkflowContract.Tests.ps1 | 2 ++ 9 files changed, 41 insertions(+), 30 deletions(-) diff --git a/scripts/Dispatch-WorkflowAtRemoteHead.ps1 b/scripts/Dispatch-WorkflowAtRemoteHead.ps1 index f186ec3..fe45b2e 100644 --- a/scripts/Dispatch-WorkflowAtRemoteHead.ps1 +++ b/scripts/Dispatch-WorkflowAtRemoteHead.ps1 @@ -11,7 +11,8 @@ param( [string]$Branch = 'main', [Parameter()] - [string[]]$Input = @(), + [Alias('Input')] + [string[]]$Inputs = @(), [Parameter()] [switch]$CancelStale, @@ -53,7 +54,7 @@ if ($CancelStale) { $dispatchStartedUtc = (Get-Date).ToUniversalTime() $dispatchArgs = @('workflow', 'run', $WorkflowFile, '-R', $Repository, '--ref', $Branch) -$dispatchArgs += @(Convert-InputPairsToGhArgs -Input $Input) +$dispatchArgs += @(Convert-InputPairsToGhArgs -Inputs $Inputs) Invoke-Gh -Arguments $dispatchArgs Start-Sleep -Seconds $DispatchPauseSeconds @@ -91,7 +92,7 @@ $report = [ordered]@{ status = [string]$selectedRun.status conclusion = [string]$selectedRun.conclusion url = [string]$selectedRun.url - inputs = @($Input) + inputs = @($Inputs) stale_cancel_report = $cancelReport } diff --git a/scripts/Invoke-OpsSloSelfHealing.ps1 b/scripts/Invoke-OpsSloSelfHealing.ps1 index 6772405..fd81426 100644 --- a/scripts/Invoke-OpsSloSelfHealing.ps1 +++ b/scripts/Invoke-OpsSloSelfHealing.ps1 @@ -173,15 +173,13 @@ try { $executionOk = $true try { $dispatchPath = Join-Path $scratchRoot ("attempt-{0}-dispatch.json" -f $attempt) - & pwsh -NoProfile -File $dispatchWorkflowScript ` + $dispatchInputs = @("sync_guard_max_age_hours=$SyncGuardMaxAgeHours") + & $dispatchWorkflowScript ` -Repository $SurfaceRepository ` -WorkflowFile $RemediationWorkflow ` -Branch $RemediationBranch ` - -Input @("sync_guard_max_age_hours=$SyncGuardMaxAgeHours") ` + -Inputs $dispatchInputs ` -OutputPath $dispatchPath | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "slo_remediation_dispatch_failed: exit_code=$LASTEXITCODE" - } $dispatchReport = Get-Content -LiteralPath $dispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop $attemptRecord.dispatch = [ordered]@{ run_id = [string]$dispatchReport.run_id diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index e4d725a..ad8bbba 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -270,20 +270,18 @@ function Invoke-ReleaseMode { } $dispatchReportPath = Join-Path $ScratchRoot "$ModeName-dispatch.json" - & pwsh -NoProfile -File $dispatchWorkflowScript ` + $dispatchInputs = @( + "release_tag=$targetTag", + 'allow_existing_tag=false', + "prerelease=$([string]([bool]$modeConfig.prerelease).ToLowerInvariant())", + "release_channel=$([string]$modeConfig.channel)" + ) + & $dispatchWorkflowScript ` -Repository $Repository ` -WorkflowFile $ReleaseWorkflowFile ` -Branch $Branch ` - -Input @( - "release_tag=$targetTag", - 'allow_existing_tag=false', - "prerelease=$([string]([bool]$modeConfig.prerelease).ToLowerInvariant())", - "release_channel=$([string]$modeConfig.channel)" - ) ` + -Inputs $dispatchInputs ` -OutputPath $dispatchReportPath - if ($LASTEXITCODE -ne 0) { - throw "release_dispatch_failed: mode=$ModeName exit_code=$LASTEXITCODE" - } $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop $watchReportPath = Join-Path $ScratchRoot "$ModeName-watch.json" diff --git a/scripts/Invoke-RollbackDrillSelfHealing.ps1 b/scripts/Invoke-RollbackDrillSelfHealing.ps1 index 3e8011f..7d55831 100644 --- a/scripts/Invoke-RollbackDrillSelfHealing.ps1 +++ b/scripts/Invoke-RollbackDrillSelfHealing.ps1 @@ -270,20 +270,18 @@ try { $attemptRecord.target_tag = [string]$targetTagRecord.tag $dispatchPath = Join-Path $scratchRoot ("attempt-{0}-dispatch.json" -f $attempt) - & pwsh -NoProfile -File $dispatchWorkflowScript ` + $dispatchInputs = @( + "release_tag=$([string]$targetTagRecord.tag)", + 'allow_existing_tag=false', + 'prerelease=true', + 'release_channel=canary' + ) + & $dispatchWorkflowScript ` -Repository $Repository ` -WorkflowFile $ReleaseWorkflowFile ` -Branch $Branch ` - -Input @( - "release_tag=$([string]$targetTagRecord.tag)", - 'allow_existing_tag=false', - 'prerelease=true', - 'release_channel=canary' - ) ` + -Inputs $dispatchInputs ` -OutputPath $dispatchPath | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "rollback_auto_release_dispatch_failed: exit_code=$LASTEXITCODE" - } $dispatchReport = Get-Content -LiteralPath $dispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop $attemptRecord.dispatch = [ordered]@{ run_id = [string]$dispatchReport.run_id diff --git a/scripts/lib/WorkflowOps.Common.ps1 b/scripts/lib/WorkflowOps.Common.ps1 index dbbc312..85207aa 100644 --- a/scripts/lib/WorkflowOps.Common.ps1 +++ b/scripts/lib/WorkflowOps.Common.ps1 @@ -72,10 +72,14 @@ function Get-UtcNowIso { } function Convert-InputPairsToGhArgs { - param([Parameter()][string[]]$Input = @()) + param( + [Parameter()] + [Alias('Input')] + [string[]]$Inputs = @() + ) $arguments = @() - foreach ($pair in @($Input)) { + foreach ($pair in @($Inputs)) { $text = ([string]$pair).Trim() if ([string]::IsNullOrWhiteSpace($text)) { continue diff --git a/tests/DispatchWorkflowAtRemoteHeadContract.Tests.ps1 b/tests/DispatchWorkflowAtRemoteHeadContract.Tests.ps1 index 6685ab8..671f4a2 100644 --- a/tests/DispatchWorkflowAtRemoteHeadContract.Tests.ps1 +++ b/tests/DispatchWorkflowAtRemoteHeadContract.Tests.ps1 @@ -18,6 +18,12 @@ Describe 'Dispatch workflow at remote head contract' { $script:content | Should -Match 'dispatch_head_sha_mismatch' } + It 'uses explicit inputs parameter with backward-compatible alias' { + $script:content | Should -Match "\[Alias\('Input'\)\]" + $script:content | Should -Match '\[string\[\]\]\$Inputs' + $script:content | Should -Match 'Convert-InputPairsToGhArgs -Inputs \$Inputs' + } + It 'supports stale-run cancellation before dispatch' { $script:content | Should -Match 'Cancel-StaleWorkflowRuns\.ps1' $script:content | Should -Match '-TargetHeadSha \$expectedHeadSha' diff --git a/tests/OpsSloGateWorkflowContract.Tests.ps1 b/tests/OpsSloGateWorkflowContract.Tests.ps1 index 7375a59..25b0319 100644 --- a/tests/OpsSloGateWorkflowContract.Tests.ps1 +++ b/tests/OpsSloGateWorkflowContract.Tests.ps1 @@ -59,6 +59,8 @@ Describe 'Ops SLO gate workflow contract' { $script:selfHealingContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' $script:selfHealingContent | Should -Match 'Watch-WorkflowRun\.ps1' $script:selfHealingContent | Should -Match 'ops-autoremediate\.yml' + $script:selfHealingContent | Should -Match '\$dispatchInputs = @\(' + $script:selfHealingContent | Should -Match '-Inputs \$dispatchInputs' $script:selfHealingContent | Should -Match 'sync_guard_max_age_hours' $script:selfHealingContent | Should -Match 'already_healthy' $script:selfHealingContent | Should -Match 'remediated' diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 74c2a42..1b4b88e 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -58,6 +58,8 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'promotion_source_not_at_head' $script:runtimeContent | Should -Match 'release_tag_range_exhausted' $script:runtimeContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' + $script:runtimeContent | Should -Match '\$dispatchInputs = @\(' + $script:runtimeContent | Should -Match '-Inputs \$dispatchInputs' } It 'decouples control-plane runner health gate to release-runner labels' { diff --git a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 index fb18339..7f0c480 100644 --- a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 @@ -57,6 +57,8 @@ Describe 'Release rollback drill workflow contract' { $script:selfHealingContent | Should -Match 'Invoke-ReleaseRollbackDrill\.ps1' $script:selfHealingContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' $script:selfHealingContent | Should -Match 'Watch-WorkflowRun\.ps1' + $script:selfHealingContent | Should -Match '\$dispatchInputs = @\(' + $script:selfHealingContent | Should -Match '-Inputs \$dispatchInputs' $script:selfHealingContent | Should -Match 'release-workspace-installer\.yml' $script:selfHealingContent | Should -Match 'release_channel=canary' $script:selfHealingContent | Should -Match 'allow_existing_tag=false' From d0df22fdd3f6276cb8af9f662f4df27979817d43 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 19:55:34 -0800 Subject: [PATCH 24/60] Flatten workflow dispatch input args for gh CLI --- scripts/lib/WorkflowOps.Common.ps1 | 2 +- ...rkflowOpsCommonInputArgsContract.Tests.ps1 | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/WorkflowOpsCommonInputArgsContract.Tests.ps1 diff --git a/scripts/lib/WorkflowOps.Common.ps1 b/scripts/lib/WorkflowOps.Common.ps1 index 85207aa..26f44bb 100644 --- a/scripts/lib/WorkflowOps.Common.ps1 +++ b/scripts/lib/WorkflowOps.Common.ps1 @@ -99,7 +99,7 @@ function Convert-InputPairsToGhArgs { $arguments += @('-f', ("{0}={1}" -f $key, $value)) } - return ,$arguments + return $arguments } function Test-WorkflowRunMatch { diff --git a/tests/WorkflowOpsCommonInputArgsContract.Tests.ps1 b/tests/WorkflowOpsCommonInputArgsContract.Tests.ps1 new file mode 100644 index 0000000..c3cc48e --- /dev/null +++ b/tests/WorkflowOpsCommonInputArgsContract.Tests.ps1 @@ -0,0 +1,42 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'WorkflowOps.Common input conversion contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:commonPath = Join-Path $script:repoRoot 'scripts/lib/WorkflowOps.Common.ps1' + if (-not (Test-Path -LiteralPath $script:commonPath -PathType Leaf)) { + throw "Common script missing: $script:commonPath" + } + + . $script:commonPath + } + + It 'returns flattened gh args for multiple key=value inputs' { + $result = Convert-InputPairsToGhArgs -Inputs @( + 'release_tag=v0.20260227.1', + 'allow_existing_tag=false' + ) + + $result.Count | Should -Be 4 + $result[0] | Should -Be '-f' + $result[1] | Should -Be 'release_tag=v0.20260227.1' + $result[2] | Should -Be '-f' + $result[3] | Should -Be 'allow_existing_tag=false' + (@($result | Where-Object { $_ -is [System.Array] })).Count | Should -Be 0 + } + + It 'keeps backward-compatible Input alias behavior' { + $result = Convert-InputPairsToGhArgs -Input @('sync_guard_max_age_hours=12') + + $result.Count | Should -Be 2 + $result[0] | Should -Be '-f' + $result[1] | Should -Be 'sync_guard_max_age_hours=12' + } + + It 'fails malformed input pairs deterministically' { + { Convert-InputPairsToGhArgs -Inputs @('release_tag') } | Should -Throw '*input_pair_invalid*' + } +} From b6b4062aa752d1e27e4993844126160848bfb4a3 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:00:12 -0800 Subject: [PATCH 25/60] Fix release preflight runner labels argument binding --- .github/workflows/_release-workspace-installer-core.yml | 2 +- tests/WorkspaceInstallerReleaseContract.Tests.ps1 | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index f734148..2c9e751 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -62,7 +62,7 @@ jobs: try { & pwsh -NoProfile -File ./scripts/Invoke-OpsMonitoringSnapshot.ps1 ` -SurfaceRepository '${{ github.repository }}' ` - -RequiredRunnerLabels @('self-hosted', 'windows', 'self-hosted-windows-lv') ` + -RequiredRunnerLabelsCsv 'self-hosted,windows,self-hosted-windows-lv' ` -OutputPath $reportPath if ($LASTEXITCODE -ne 0) { throw 'Ops monitoring snapshot returned non-zero exit.' diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index ce55854..be6450c 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -49,6 +49,7 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'name:\s*Release Ops Health Preflight' $script:coreWorkflowContent | Should -Match 'Enforce ops health preflight' $script:coreWorkflowContent | Should -Match 'Invoke-OpsMonitoringSnapshot\.ps1' + $script:coreWorkflowContent | Should -Match 'RequiredRunnerLabelsCsv ''self-hosted,windows,self-hosted-windows-lv''' $script:coreWorkflowContent | Should -Match 'reason_code=ops_unhealthy' $script:coreWorkflowContent | Should -Match '\[ops_unhealthy\]' $script:coreWorkflowContent | Should -Match 'release-ops-health-preflight-\$\{\{\s*github\.run_id\s*\}\}' From 0dc8310ddb822ebcf0bc20532108115e2e310469 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:04:22 -0800 Subject: [PATCH 26/60] Handle runner preflight 403 with deterministic warning path --- .../_release-workspace-installer-core.yml | 73 +++++++++++++------ ...orkspaceInstallerReleaseContract.Tests.ps1 | 3 + 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 2c9e751..93e98d6 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -101,53 +101,84 @@ jobs: $reportPath = Join-Path $env:RUNNER_TEMP 'release-runner-availability-preflight.json' $runnersJson = & gh api "repos/$repo/actions/runners?per_page=100" 2>&1 - if ($LASTEXITCODE -ne 0) { - throw "Failed to list runners for '$repo'. $([string]::Join("`n", @($runnersJson)))" + $runnerApiExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + $runnerVisibility = 'available' + $runnerQueryError = '' + if ($runnerApiExitCode -ne 0) { + $runnerQueryError = [string]::Join("`n", @($runnersJson)) + if ($runnerQueryError -match 'Resource not accessible by integration' -or $runnerQueryError -match 'HTTP 403') { + $runnerVisibility = 'forbidden' + } else { + throw "Failed to list runners for '$repo'. $runnerQueryError" + } } - $runnerPayload = $runnersJson | ConvertFrom-Json -ErrorAction Stop $onlineRunners = @() $eligibleRunners = @() - foreach ($runner in @($runnerPayload.runners)) { - if ([string]$runner.status -ne 'online') { - continue - } + if ($runnerVisibility -eq 'available') { + $runnerPayload = $runnersJson | ConvertFrom-Json -ErrorAction Stop + foreach ($runner in @($runnerPayload.runners)) { + if ([string]$runner.status -ne 'online') { + continue + } - $onlineRunners += [string]$runner.name - $runnerLabels = @{} - foreach ($label in @($runner.labels)) { - $runnerLabels[[string]$label.name.ToLowerInvariant()] = $true - } + $onlineRunners += [string]$runner.name + $runnerLabels = @{} + foreach ($label in @($runner.labels)) { + $runnerLabels[[string]$label.name.ToLowerInvariant()] = $true + } - $missingLabels = @($requiredLabels | Where-Object { -not $runnerLabels.ContainsKey($_) }) - if ($missingLabels.Count -eq 0) { - $eligibleRunners += [ordered]@{ - name = [string]$runner.name - labels = @($runner.labels | ForEach-Object { [string]$_.name }) + $missingLabels = @($requiredLabels | Where-Object { -not $runnerLabels.ContainsKey($_) }) + if ($missingLabels.Count -eq 0) { + $eligibleRunners += [ordered]@{ + name = [string]$runner.name + labels = @($runner.labels | ForEach-Object { [string]$_.name }) + } } } } + $status = 'fail' + $reasonCode = 'runner_unavailable' + $remediation = 'Register at least one online self-hosted runner with labels self-hosted, windows, self-hosted-windows-lv.' + if ($runnerVisibility -eq 'forbidden') { + $status = 'warn' + $reasonCode = 'runner_visibility_unavailable' + $remediation = 'Grant token access to list self-hosted runners, or run an out-of-band runner availability check.' + } elseif ($eligibleRunners.Count -gt 0) { + $status = 'pass' + $reasonCode = 'ok' + $remediation = '' + } + $report = [ordered]@{ schema_version = '1.0' repository = $repo generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') required_labels = $requiredLabels + runner_visibility = $runnerVisibility + runner_query_error = $runnerQueryError online_runners = $onlineRunners eligible_runners = $eligibleRunners - status = if ($eligibleRunners.Count -gt 0) { 'pass' } else { 'fail' } - reason_code = if ($eligibleRunners.Count -gt 0) { 'ok' } else { 'runner_unavailable' } - remediation = 'Register at least one online self-hosted runner with labels self-hosted, windows, self-hosted-windows-lv.' + status = $status + reason_code = $reasonCode + remediation = $remediation } $report | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $reportPath -Encoding utf8 - if ($eligibleRunners.Count -gt 0) { + if ($status -eq 'pass') { "reason_code=ok" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 Write-Host "Runner preflight passed. Eligible runners: $($eligibleRunners.Count)." exit 0 } + if ($status -eq 'warn') { + "reason_code=runner_visibility_unavailable" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + Write-Warning "[runner_visibility_unavailable] Runner list API is not accessible with current token. Continuing without fail-fast runner gate." + exit 0 + } + "reason_code=runner_unavailable" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 throw "[runner_unavailable] No online runner matched required labels ($($requiredLabels -join ', ')). Remediation: $($report.remediation)" diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index be6450c..199caf2 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -58,6 +58,9 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'repos/\$repo/actions/runners\?per_page=100' $script:coreWorkflowContent | Should -Match 'reason_code=runner_unavailable' $script:coreWorkflowContent | Should -Match '\[runner_unavailable\]' + $script:coreWorkflowContent | Should -Match 'reason_code=runner_visibility_unavailable' + $script:coreWorkflowContent | Should -Match 'runner_visibility' + $script:coreWorkflowContent | Should -Match '\[runner_visibility_unavailable\]' $script:coreWorkflowContent | Should -Match 'release-runner-availability-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Package Workspace Installer' $script:coreWorkflowContent | Should -Match 'needs:\s*\[ops_health_preflight,\s*runner_preflight\]' From e3e62ba170b0b5b3a4e5962d7f838a6aa33b3de4 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:08:06 -0800 Subject: [PATCH 27/60] Introduce composite runner preflight action --- .github/actions/runner-preflight/action.yml | 131 ++++++++++++++++++ .../_release-workspace-installer-core.yml | 94 +------------ ...orkspaceInstallerReleaseContract.Tests.ps1 | 20 ++- 3 files changed, 150 insertions(+), 95 deletions(-) create mode 100644 .github/actions/runner-preflight/action.yml diff --git a/.github/actions/runner-preflight/action.yml b/.github/actions/runner-preflight/action.yml new file mode 100644 index 0000000..8d17d02 --- /dev/null +++ b/.github/actions/runner-preflight/action.yml @@ -0,0 +1,131 @@ +name: runner-preflight +description: Deterministic self-hosted runner availability preflight with 403 visibility fallback. + +inputs: + repository: + description: owner/repo target for runner visibility. + required: true + required_labels_csv: + description: Comma-delimited required runner labels. + required: true + report_path: + description: Absolute path to write preflight JSON report. + required: true + +outputs: + reason_code: + description: ok | runner_unavailable | runner_visibility_unavailable + value: ${{ steps.check.outputs.reason_code }} + report_path: + description: Output JSON report path. + value: ${{ steps.check.outputs.report_path }} + +runs: + using: composite + steps: + - id: check + name: Evaluate runner availability gate + shell: pwsh + run: | + $ErrorActionPreference = 'Stop' + + $repo = [string]'${{ inputs.repository }}' + if ([string]::IsNullOrWhiteSpace($repo)) { + throw 'repository_required' + } + + $requiredLabels = @( + [string]'${{ inputs.required_labels_csv }}'.Split(',') | + ForEach-Object { ([string]$_).Trim() } | + Where-Object { -not [string]::IsNullOrWhiteSpace($_) } + ) + if ($requiredLabels.Count -eq 0) { + throw 'required_labels_empty' + } + + $reportPath = [string]'${{ inputs.report_path }}' + if ([string]::IsNullOrWhiteSpace($reportPath)) { + throw 'report_path_required' + } + + $runnersJson = & gh api "repos/$repo/actions/runners?per_page=100" 2>&1 + $runnerApiExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + $runnerVisibility = 'available' + $runnerQueryError = '' + if ($runnerApiExitCode -ne 0) { + $runnerQueryError = [string]::Join("`n", @($runnersJson)) + if ($runnerQueryError -match 'Resource not accessible by integration' -or $runnerQueryError -match 'HTTP 403') { + $runnerVisibility = 'forbidden' + } else { + throw "Failed to list runners for '$repo'. $runnerQueryError" + } + } + + $onlineRunners = @() + $eligibleRunners = @() + if ($runnerVisibility -eq 'available') { + $runnerPayload = $runnersJson | ConvertFrom-Json -ErrorAction Stop + foreach ($runner in @($runnerPayload.runners)) { + if ([string]$runner.status -ne 'online') { + continue + } + + $onlineRunners += [string]$runner.name + $runnerLabels = @{} + foreach ($label in @($runner.labels)) { + $runnerLabels[[string]$label.name.ToLowerInvariant()] = $true + } + + $missingLabels = @($requiredLabels | Where-Object { -not $runnerLabels.ContainsKey($_) }) + if ($missingLabels.Count -eq 0) { + $eligibleRunners += [ordered]@{ + name = [string]$runner.name + labels = @($runner.labels | ForEach-Object { [string]$_.name }) + } + } + } + } + + $status = 'fail' + $reasonCode = 'runner_unavailable' + $remediation = 'Register at least one online self-hosted runner with the required labels.' + if ($runnerVisibility -eq 'forbidden') { + $status = 'warn' + $reasonCode = 'runner_visibility_unavailable' + $remediation = 'Grant token access to list self-hosted runners, or run an out-of-band runner availability check.' + } elseif ($eligibleRunners.Count -gt 0) { + $status = 'pass' + $reasonCode = 'ok' + $remediation = '' + } + + $report = [ordered]@{ + schema_version = '1.0' + repository = $repo + generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') + required_labels = $requiredLabels + runner_visibility = $runnerVisibility + runner_query_error = $runnerQueryError + online_runners = $onlineRunners + eligible_runners = $eligibleRunners + status = $status + reason_code = $reasonCode + remediation = $remediation + } + + $report | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $reportPath -Encoding utf8 + + "reason_code=$reasonCode" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + "report_path=$reportPath" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + + if ($status -eq 'pass') { + Write-Host "Runner preflight passed. Eligible runners: $($eligibleRunners.Count)." + exit 0 + } + + if ($status -eq 'warn') { + Write-Warning "[runner_visibility_unavailable] Runner list API is not accessible with current token. Continuing without fail-fast runner gate." + exit 0 + } + + throw "[runner_unavailable] No online runner matched required labels ($($requiredLabels -join ', ')). Remediation: $remediation" diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 93e98d6..e44decb 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -90,97 +90,13 @@ jobs: steps: - id: check name: Validate eligible self-hosted release runner availability - shell: pwsh + uses: ./.github/actions/runner-preflight env: GH_TOKEN: ${{ github.token }} - run: | - $ErrorActionPreference = 'Stop' - - $repo = [string]'${{ github.repository }}' - $requiredLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') - $reportPath = Join-Path $env:RUNNER_TEMP 'release-runner-availability-preflight.json' - - $runnersJson = & gh api "repos/$repo/actions/runners?per_page=100" 2>&1 - $runnerApiExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } - $runnerVisibility = 'available' - $runnerQueryError = '' - if ($runnerApiExitCode -ne 0) { - $runnerQueryError = [string]::Join("`n", @($runnersJson)) - if ($runnerQueryError -match 'Resource not accessible by integration' -or $runnerQueryError -match 'HTTP 403') { - $runnerVisibility = 'forbidden' - } else { - throw "Failed to list runners for '$repo'. $runnerQueryError" - } - } - - $onlineRunners = @() - $eligibleRunners = @() - if ($runnerVisibility -eq 'available') { - $runnerPayload = $runnersJson | ConvertFrom-Json -ErrorAction Stop - foreach ($runner in @($runnerPayload.runners)) { - if ([string]$runner.status -ne 'online') { - continue - } - - $onlineRunners += [string]$runner.name - $runnerLabels = @{} - foreach ($label in @($runner.labels)) { - $runnerLabels[[string]$label.name.ToLowerInvariant()] = $true - } - - $missingLabels = @($requiredLabels | Where-Object { -not $runnerLabels.ContainsKey($_) }) - if ($missingLabels.Count -eq 0) { - $eligibleRunners += [ordered]@{ - name = [string]$runner.name - labels = @($runner.labels | ForEach-Object { [string]$_.name }) - } - } - } - } - - $status = 'fail' - $reasonCode = 'runner_unavailable' - $remediation = 'Register at least one online self-hosted runner with labels self-hosted, windows, self-hosted-windows-lv.' - if ($runnerVisibility -eq 'forbidden') { - $status = 'warn' - $reasonCode = 'runner_visibility_unavailable' - $remediation = 'Grant token access to list self-hosted runners, or run an out-of-band runner availability check.' - } elseif ($eligibleRunners.Count -gt 0) { - $status = 'pass' - $reasonCode = 'ok' - $remediation = '' - } - - $report = [ordered]@{ - schema_version = '1.0' - repository = $repo - generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') - required_labels = $requiredLabels - runner_visibility = $runnerVisibility - runner_query_error = $runnerQueryError - online_runners = $onlineRunners - eligible_runners = $eligibleRunners - status = $status - reason_code = $reasonCode - remediation = $remediation - } - - $report | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $reportPath -Encoding utf8 - - if ($status -eq 'pass') { - "reason_code=ok" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 - Write-Host "Runner preflight passed. Eligible runners: $($eligibleRunners.Count)." - exit 0 - } - - if ($status -eq 'warn') { - "reason_code=runner_visibility_unavailable" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 - Write-Warning "[runner_visibility_unavailable] Runner list API is not accessible with current token. Continuing without fail-fast runner gate." - exit 0 - } - - "reason_code=runner_unavailable" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 - throw "[runner_unavailable] No online runner matched required labels ($($requiredLabels -join ', ')). Remediation: $($report.remediation)" + with: + repository: ${{ github.repository }} + required_labels_csv: self-hosted,windows,self-hosted-windows-lv + report_path: ${{ runner.temp }}/release-runner-availability-preflight.json - name: Upload runner availability preflight report if: always() diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 199caf2..2da03c4 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -8,14 +8,19 @@ Describe 'Workspace installer release workflow contract' { $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path $script:wrapperWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-workspace-installer.yml' $script:coreWorkflowPath = Join-Path $script:repoRoot '.github/workflows/_release-workspace-installer-core.yml' + $script:runnerPreflightActionPath = Join-Path $script:repoRoot '.github/actions/runner-preflight/action.yml' if (-not (Test-Path -LiteralPath $script:wrapperWorkflowPath -PathType Leaf)) { throw "Release wrapper workflow not found: $script:wrapperWorkflowPath" } if (-not (Test-Path -LiteralPath $script:coreWorkflowPath -PathType Leaf)) { throw "Release core workflow not found: $script:coreWorkflowPath" } + if (-not (Test-Path -LiteralPath $script:runnerPreflightActionPath -PathType Leaf)) { + throw "Runner preflight action not found: $script:runnerPreflightActionPath" + } $script:wrapperWorkflowContent = Get-Content -LiteralPath $script:wrapperWorkflowPath -Raw $script:coreWorkflowContent = Get-Content -LiteralPath $script:coreWorkflowPath -Raw + $script:runnerPreflightActionContent = Get-Content -LiteralPath $script:runnerPreflightActionPath -Raw } It 'keeps dispatch-only wrapper and forwards to release core workflow' { @@ -55,12 +60,15 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'release-ops-health-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Release Runner Availability Preflight' $script:coreWorkflowContent | Should -Match 'Validate eligible self-hosted release runner availability' - $script:coreWorkflowContent | Should -Match 'repos/\$repo/actions/runners\?per_page=100' - $script:coreWorkflowContent | Should -Match 'reason_code=runner_unavailable' - $script:coreWorkflowContent | Should -Match '\[runner_unavailable\]' - $script:coreWorkflowContent | Should -Match 'reason_code=runner_visibility_unavailable' - $script:coreWorkflowContent | Should -Match 'runner_visibility' - $script:coreWorkflowContent | Should -Match '\[runner_visibility_unavailable\]' + $script:coreWorkflowContent | Should -Match 'uses:\s*\./\.github/actions/runner-preflight' + $script:coreWorkflowContent | Should -Match 'required_labels_csv:\s*self-hosted,windows,self-hosted-windows-lv' + $script:runnerPreflightActionContent | Should -Match 'repos/\$repo/actions/runners\?per_page=100' + $script:runnerPreflightActionContent | Should -Match 'reason_code' + $script:runnerPreflightActionContent | Should -Match 'runner_unavailable' + $script:runnerPreflightActionContent | Should -Match 'runner_visibility_unavailable' + $script:runnerPreflightActionContent | Should -Match 'runner_visibility' + $script:runnerPreflightActionContent | Should -Match '\[runner_unavailable\]' + $script:runnerPreflightActionContent | Should -Match '\[runner_visibility_unavailable\]' $script:coreWorkflowContent | Should -Match 'release-runner-availability-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Package Workspace Installer' $script:coreWorkflowContent | Should -Match 'needs:\s*\[ops_health_preflight,\s*runner_preflight\]' From ae7b1c68d2bb4826694530cd8a9a64dc6e5b0163 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:11:04 -0800 Subject: [PATCH 28/60] Allow rollback self-heal for missing release assets --- scripts/Invoke-RollbackDrillSelfHealing.ps1 | 8 +++++++- tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/Invoke-RollbackDrillSelfHealing.ps1 b/scripts/Invoke-RollbackDrillSelfHealing.ps1 index 7d55831..9308377 100644 --- a/scripts/Invoke-RollbackDrillSelfHealing.ps1 +++ b/scripts/Invoke-RollbackDrillSelfHealing.ps1 @@ -236,7 +236,13 @@ try { $report.message = 'Rollback drill failed and auto-remediation is disabled.' } else { $initialReasons = @($initialReport.reason_codes | ForEach-Object { [string]$_ }) - $canAutomate = (($initialReasons -contains 'rollback_candidate_missing') -and ([string]$Channel -eq 'canary')) + $canAutomate = ( + ([string]$Channel -eq 'canary') -and + ( + ($initialReasons -contains 'rollback_candidate_missing') -or + ($initialReasons -contains 'rollback_assets_missing') + ) + ) if (-not $canAutomate) { $report.status = 'fail' $report.reason_code = 'no_automatable_action' diff --git a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 index 7f0c480..2b753d4 100644 --- a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 @@ -63,6 +63,7 @@ Describe 'Release rollback drill workflow contract' { $script:selfHealingContent | Should -Match 'release_channel=canary' $script:selfHealingContent | Should -Match 'allow_existing_tag=false' $script:selfHealingContent | Should -Match 'rollback_candidate_missing' + $script:selfHealingContent | Should -Match 'rollback_assets_missing' $script:selfHealingContent | Should -Match 'already_ready' $script:selfHealingContent | Should -Match 'remediated' $script:selfHealingContent | Should -Match 'no_automatable_action' From 930e01d5424b7dd7844091408b48f32869e0c590 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:14:30 -0800 Subject: [PATCH 29/60] Add checkout before local runner-preflight action --- .github/workflows/_release-workspace-installer-core.yml | 3 +++ tests/WorkspaceInstallerReleaseContract.Tests.ps1 | 1 + 2 files changed, 4 insertions(+) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index e44decb..8853869 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -88,6 +88,9 @@ jobs: outputs: reason_code: ${{ steps.check.outputs.reason_code }} steps: + - name: Checkout + uses: actions/checkout@v4 + - id: check name: Validate eligible self-hosted release runner availability uses: ./.github/actions/runner-preflight diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 2da03c4..534736a 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -60,6 +60,7 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'release-ops-health-preflight-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match 'name:\s*Release Runner Availability Preflight' $script:coreWorkflowContent | Should -Match 'Validate eligible self-hosted release runner availability' + $script:coreWorkflowContent | Should -Match '(?s)runner_preflight:.*?-\s*name:\s*Checkout.*?actions/checkout@v4.*?-\s*id:\s*check' $script:coreWorkflowContent | Should -Match 'uses:\s*\./\.github/actions/runner-preflight' $script:coreWorkflowContent | Should -Match 'required_labels_csv:\s*self-hosted,windows,self-hosted-windows-lv' $script:runnerPreflightActionContent | Should -Match 'repos/\$repo/actions/runners\?per_page=100' From 6de1ba9cd2d21b44f3e93fa412bfc93c530f7309 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:21:22 -0800 Subject: [PATCH 30/60] Fix immutable release publish path for new tags --- .../_release-workspace-installer-core.yml | 25 +++++++++++-------- ...orkspaceInstallerReleaseContract.Tests.ps1 | 2 ++ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 8853869..8f8f7fa 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -546,14 +546,15 @@ jobs: } $releaseTitle = "Workspace Installer $releaseTag" + $releaseAssets = @($assetPath, $shaPath, $reproPath, $spdxPath, $slsaPath, $releaseManifestPath) if (-not $releaseExists) { if ($prerelease) { - & gh release create $releaseTag -R $repo --target $releaseTargetSha --title $releaseTitle --notes-file $releaseNotesPath --prerelease + & gh release create $releaseTag $releaseAssets -R $repo --target $releaseTargetSha --title $releaseTitle --notes-file $releaseNotesPath --prerelease } else { - & gh release create $releaseTag -R $repo --target $releaseTargetSha --title $releaseTitle --notes-file $releaseNotesPath + & gh release create $releaseTag $releaseAssets -R $repo --target $releaseTargetSha --title $releaseTitle --notes-file $releaseNotesPath } - if ($LASTEXITCODE -ne 0) { throw "Failed to create release '$releaseTag' for '$repo'." } + if ($LASTEXITCODE -ne 0) { throw "Failed to create release '$releaseTag' for '$repo' with assets." } } else { if ($prerelease) { & gh release edit $releaseTag -R $repo --title $releaseTitle --notes-file $releaseNotesPath --prerelease @@ -561,13 +562,15 @@ jobs: & gh release edit $releaseTag -R $repo --title $releaseTitle --notes-file $releaseNotesPath } if ($LASTEXITCODE -ne 0) { throw "Failed to edit release '$releaseTag' for '$repo'." } - } - if ($allowExistingTag) { - & gh release upload $releaseTag $assetPath $shaPath $reproPath $spdxPath $slsaPath $releaseManifestPath -R $repo --clobber - } else { - & gh release upload $releaseTag $assetPath $shaPath $reproPath $spdxPath $slsaPath $releaseManifestPath -R $repo - } - if ($LASTEXITCODE -ne 0) { - throw "Failed to upload release assets for '$releaseTag'." + if ($allowExistingTag) { + $uploadOutput = & gh release upload $releaseTag $releaseAssets -R $repo --clobber 2>&1 + if ($LASTEXITCODE -ne 0) { + $uploadText = ($uploadOutput | Out-String).Trim() + if ($uploadText -match '(?i)immutable release') { + throw "[release_immutable] Release tag '$releaseTag' is immutable and cannot be overwritten. Publish a new semantic tag." + } + throw "Failed to upload release assets for '$releaseTag': $uploadText" + } + } } diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 534736a..0bbc487 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -98,12 +98,14 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'Parity artifact path was selected for release publish input' $script:coreWorkflowContent | Should -Match 'workspace-installer-release-\$\{\{\s*github\.run_id\s*\}\}' $script:coreWorkflowContent | Should -Match '(gh release create|''release'',\s*''create'')' + $script:coreWorkflowContent | Should -Match '\$releaseAssets = @\(\$assetPath, \$shaPath, \$reproPath, \$spdxPath, \$slsaPath, \$releaseManifestPath\)' $script:coreWorkflowContent | Should -Match '--target \$releaseTargetSha' $script:coreWorkflowContent | Should -Match 'RELEASE_TARGET_SHA:\s*\$\{\{\s*github\.sha\s*\}\}' $script:coreWorkflowContent | Should -Match 'already exists' $script:coreWorkflowContent | Should -Match 'allow_existing_tag=true' $script:coreWorkflowContent | Should -Match 'gh release upload' $script:coreWorkflowContent | Should -Match '--clobber' + $script:coreWorkflowContent | Should -Match '\[release_immutable\]' } It 'enforces release notes, tag validation, and override disclosure support' { From 557bd931ff1e0cdca054cbcf4bf1a675f318bb18 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:35:36 -0800 Subject: [PATCH 31/60] Add dual-mode SemVer migration for release tags and hygiene --- .../_release-workspace-installer-core.yml | 165 ++++++++++++++++-- .../workflows/canary-smoke-tag-hygiene.yml | 14 ++ .../workflows/release-with-windows-gate.yml | 2 +- .../workflows/release-workspace-installer.yml | 2 +- AGENTS.md | 12 +- README.md | 14 +- scripts/Invoke-CanarySmokeTagHygiene.ps1 | 141 +++++++++++---- scripts/Invoke-ReleaseControlPlane.ps1 | 4 + ...ySmokeTagHygieneWorkflowContract.Tests.ps1 | 9 + ...easeControlPlaneWorkflowContract.Tests.ps1 | 2 + ...orkspaceInstallerReleaseContract.Tests.ps1 | 6 +- 11 files changed, 311 insertions(+), 60 deletions(-) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 8f8f7fa..6fc4245 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -4,7 +4,7 @@ on: workflow_call: inputs: release_tag: - description: Release tag to publish (for example, v0.1.0). + description: Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N). required: true type: string allow_existing_tag: @@ -168,14 +168,74 @@ jobs: run: | $ErrorActionPreference = 'Stop' + $releaseTag = [string]'${{ inputs.release_tag }}' + if ([string]::IsNullOrWhiteSpace($releaseTag)) { + throw 'release_tag is required.' + } + + $legacyTagPattern = '^v0\.(?\d{8})\.(?\d{1,3})$' + $semverTagPattern = '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)(?:-(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$' + $legacyMatch = [regex]::Match($releaseTag, $legacyTagPattern) + $semverMatch = [regex]::Match($releaseTag, $semverTagPattern) + + $tagFamily = '' + $channelFromTag = '' + $tagImpliesPrerelease = $false + $tagMigrationWarning = '' + + if ($legacyMatch.Success) { + $sequence = [int]$legacyMatch.Groups['sequence'].Value + if ($sequence -ge 1 -and $sequence -le 49) { + $channelFromTag = 'canary' + $tagImpliesPrerelease = $true + } elseif ($sequence -ge 50 -and $sequence -le 79) { + $channelFromTag = 'prerelease' + $tagImpliesPrerelease = $true + } elseif ($sequence -ge 80 -and $sequence -le 99) { + $channelFromTag = 'stable' + $tagImpliesPrerelease = $false + } else { + throw "Invalid legacy release_tag '$releaseTag'. Sequence must be within 1-99." + } + + $tagFamily = 'legacy_date_window' + $tagMigrationWarning = "Legacy date-window tag '$releaseTag' is accepted during migration. Prefer SemVer tags (stable: vX.Y.Z, prerelease: vX.Y.Z-rc.N, canary: vX.Y.Z-canary.N)." + } elseif ($semverMatch.Success) { + $tagFamily = 'semver' + $prereleaseLabel = [string]$semverMatch.Groups['prerelease'].Value + if ([string]::IsNullOrWhiteSpace($prereleaseLabel)) { + $channelFromTag = 'stable' + $tagImpliesPrerelease = $false + } else { + $tagImpliesPrerelease = $true + if ($prereleaseLabel -match '(?i)(^|[.\-])canary([.\-]|$)') { + $channelFromTag = 'canary' + } else { + $channelFromTag = 'prerelease' + } + } + } else { + throw "Invalid release_tag '$releaseTag'. Accepted formats: SemVer (vX.Y.Z, vX.Y.Z-rc.N, vX.Y.Z-canary.N) or legacy migration format (v0.YYYYMMDD.N)." + } + $releaseChannel = [string]'${{ inputs.release_channel }}' $isPrereleaseInput = [System.Convert]::ToBoolean([string]'${{ inputs.prerelease }}') + if ($isPrereleaseInput -ne $tagImpliesPrerelease) { + throw "[channel_tag_mismatch] prerelease input '$isPrereleaseInput' does not match release_tag '$releaseTag' (implied prerelease=$tagImpliesPrerelease)." + } + if ([string]::IsNullOrWhiteSpace($releaseChannel)) { $releaseChannel = if ($isPrereleaseInput) { 'prerelease' } else { 'stable' } } if ($releaseChannel -notin @('stable', 'prerelease', 'canary')) { throw "Unsupported release channel '$releaseChannel'. Expected stable, prerelease, or canary." } + if ($releaseChannel -ne $channelFromTag) { + throw "[channel_tag_mismatch] release_channel '$releaseChannel' does not match release_tag '$releaseTag' (implied channel=$channelFromTag)." + } + if (-not [string]::IsNullOrWhiteSpace($tagMigrationWarning)) { + Write-Warning "[tag_migration_warning] $tagMigrationWarning" + } $assetName = 'lvie-cdev-workspace-installer.exe' $releaseRoot = Join-Path $env:RUNNER_TEMP 'workspace-installer-release' @@ -353,7 +413,7 @@ jobs: $releaseManifestPath = Join-Path $releaseRoot 'release-manifest.json' & pwsh -NoProfile -File (Join-Path $env:GITHUB_WORKSPACE 'scripts/Write-ReleaseManifest.ps1') ` -Repository '${{ github.repository }}' ` - -ReleaseTag '${{ inputs.release_tag }}' ` + -ReleaseTag $releaseTag ` -Channel $releaseChannel ` -InstallerPath $assetPath ` -InstallerSha256 $assetSha ` @@ -374,7 +434,12 @@ jobs: asset_name = $assetName asset_path = $assetPath asset_sha256 = $assetSha + release_tag = $releaseTag release_channel = $releaseChannel + channel_from_tag = $channelFromTag + tag_family = $tagFamily + tag_implies_prerelease = $tagImpliesPrerelease + tag_migration_warning = $tagMigrationWarning install_command = 'lvie-cdev-workspace-installer.exe /S' repro_report = (Join-Path $reproRoot 'workspace-installer-determinism-summary.json') spdx_path = (Join-Path $provRoot 'workspace-installer.spdx.json') @@ -438,8 +503,67 @@ jobs: $ErrorActionPreference = 'Stop' $releaseTag = [string]$env:RELEASE_TAG - if ($releaseTag -notmatch '^v[0-9]+\.[0-9]+\.[0-9]+$') { - throw "Invalid release_tag '$releaseTag'. Expected semantic tag like v0.1.0." + if ([string]::IsNullOrWhiteSpace($releaseTag)) { + throw 'release_tag is required.' + } + + $allowExistingTag = $false + if (-not [string]::IsNullOrWhiteSpace($env:ALLOW_EXISTING_TAG)) { + $allowExistingTag = [System.Convert]::ToBoolean($env:ALLOW_EXISTING_TAG) + } + $prerelease = $false + if (-not [string]::IsNullOrWhiteSpace($env:PRERELEASE)) { + $prerelease = [System.Convert]::ToBoolean($env:PRERELEASE) + } + $overrideApplied = $false + if (-not [string]::IsNullOrWhiteSpace($env:OVERRIDE_APPLIED)) { + $overrideApplied = [System.Convert]::ToBoolean($env:OVERRIDE_APPLIED) + } + + $legacyTagPattern = '^v0\.(?\d{8})\.(?\d{1,3})$' + $semverTagPattern = '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)(?:-(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$' + $legacyMatch = [regex]::Match($releaseTag, $legacyTagPattern) + $semverMatch = [regex]::Match($releaseTag, $semverTagPattern) + + $channelFromTag = '' + $tagImpliesPrerelease = $false + $tagMigrationWarning = '' + + if ($legacyMatch.Success) { + $sequence = [int]$legacyMatch.Groups['sequence'].Value + if ($sequence -ge 1 -and $sequence -le 49) { + $channelFromTag = 'canary' + $tagImpliesPrerelease = $true + } elseif ($sequence -ge 50 -and $sequence -le 79) { + $channelFromTag = 'prerelease' + $tagImpliesPrerelease = $true + } elseif ($sequence -ge 80 -and $sequence -le 99) { + $channelFromTag = 'stable' + $tagImpliesPrerelease = $false + } else { + throw "Invalid legacy release_tag '$releaseTag'. Sequence must be within 1-99." + } + + $tagMigrationWarning = "Legacy date-window tag '$releaseTag' is accepted during migration. Prefer SemVer tags (stable: vX.Y.Z, prerelease: vX.Y.Z-rc.N, canary: vX.Y.Z-canary.N)." + } elseif ($semverMatch.Success) { + $prereleaseLabel = [string]$semverMatch.Groups['prerelease'].Value + if ([string]::IsNullOrWhiteSpace($prereleaseLabel)) { + $channelFromTag = 'stable' + $tagImpliesPrerelease = $false + } else { + $tagImpliesPrerelease = $true + if ($prereleaseLabel -match '(?i)(^|[.\-])canary([.\-]|$)') { + $channelFromTag = 'canary' + } else { + $channelFromTag = 'prerelease' + } + } + } else { + throw "Invalid release_tag '$releaseTag'. Accepted formats: SemVer (vX.Y.Z, vX.Y.Z-rc.N, vX.Y.Z-canary.N) or legacy migration format (v0.YYYYMMDD.N)." + } + + if ($prerelease -ne $tagImpliesPrerelease) { + throw "[channel_tag_mismatch] prerelease input '$prerelease' does not match release_tag '$releaseTag' (implied prerelease=$tagImpliesPrerelease)." } $artifactRoot = Join-Path $env:RUNNER_TEMP 'workspace-installer-release' @@ -468,6 +592,15 @@ jobs: if ([string]::IsNullOrWhiteSpace($releaseChannel)) { $releaseChannel = if ($prerelease) { 'prerelease' } else { 'stable' } } + if ($releaseChannel -notin @('stable', 'prerelease', 'canary')) { + throw "Unsupported release channel '$releaseChannel'. Expected stable, prerelease, or canary." + } + if ($releaseChannel -ne $channelFromTag) { + throw "[channel_tag_mismatch] release_channel '$releaseChannel' does not match release_tag '$releaseTag' (implied channel=$channelFromTag)." + } + if (-not [string]::IsNullOrWhiteSpace($tagMigrationWarning)) { + Write-Warning "[tag_migration_warning] $tagMigrationWarning" + } if ([string]::IsNullOrWhiteSpace($assetName) -or [string]::IsNullOrWhiteSpace($assetSha)) { throw "Installer metadata is missing required fields." } @@ -476,19 +609,6 @@ jobs: throw "Parity artifact path was selected for release publish input: $assetPathNormalized" } - $allowExistingTag = $false - if (-not [string]::IsNullOrWhiteSpace($env:ALLOW_EXISTING_TAG)) { - $allowExistingTag = [System.Convert]::ToBoolean($env:ALLOW_EXISTING_TAG) - } - $prerelease = $false - if (-not [string]::IsNullOrWhiteSpace($env:PRERELEASE)) { - $prerelease = [System.Convert]::ToBoolean($env:PRERELEASE) - } - $overrideApplied = $false - if (-not [string]::IsNullOrWhiteSpace($env:OVERRIDE_APPLIED)) { - $overrideApplied = [System.Convert]::ToBoolean($env:OVERRIDE_APPLIED) - } - $releaseNotesPath = Join-Path $env:RUNNER_TEMP "release-notes-$releaseTag.md" $releaseNoteLines = @( "# Workspace Installer $releaseTag" @@ -523,6 +643,13 @@ jobs: "- Incident: $env:OVERRIDE_INCIDENT_URL" ) } + if (-not [string]::IsNullOrWhiteSpace($tagMigrationWarning)) { + $releaseNoteLines += @( + "" + "Tag Policy Notice:" + "- $tagMigrationWarning" + ) + } $releaseNoteLines | Set-Content -LiteralPath $releaseNotesPath -Encoding utf8 $repo = [string]$env:TARGET_REPOSITORY @@ -542,7 +669,7 @@ jobs: if ($releaseExists -and -not $allowExistingTag) { $publishedAt = [string]$existingReleaseMetadata.publishedAt $releaseUrl = [string]$existingReleaseMetadata.url - throw "Release tag '$releaseTag' already exists (publishedAt=$publishedAt, url=$releaseUrl). Use a new semantic tag or set allow_existing_tag=true for break-glass overwrite." + throw "Release tag '$releaseTag' already exists (publishedAt=$publishedAt, url=$releaseUrl). Use a new release tag or set allow_existing_tag=true for break-glass overwrite." } $releaseTitle = "Workspace Installer $releaseTag" @@ -568,7 +695,7 @@ jobs: if ($LASTEXITCODE -ne 0) { $uploadText = ($uploadOutput | Out-String).Trim() if ($uploadText -match '(?i)immutable release') { - throw "[release_immutable] Release tag '$releaseTag' is immutable and cannot be overwritten. Publish a new semantic tag." + throw "[release_immutable] Release tag '$releaseTag' is immutable and cannot be overwritten. Publish a new release tag." } throw "Failed to upload release assets for '$releaseTag': $uploadText" } diff --git a/.github/workflows/canary-smoke-tag-hygiene.yml b/.github/workflows/canary-smoke-tag-hygiene.yml index 68294e3..1f39720 100644 --- a/.github/workflows/canary-smoke-tag-hygiene.yml +++ b/.github/workflows/canary-smoke-tag-hygiene.yml @@ -10,6 +10,11 @@ on: required: false default: '' type: string + tag_family: + description: Tag family scope for hygiene (auto, legacy_date_window, semver). + required: false + default: auto + type: string keep_latest_n: description: Number of latest canary smoke tags to keep for the target date. required: false @@ -45,6 +50,14 @@ jobs: $targetDate = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') } + $tagFamily = [string]'${{ inputs.tag_family }}' + if ([string]::IsNullOrWhiteSpace($tagFamily)) { + $tagFamily = 'auto' + } + if ($tagFamily -notin @('auto', 'legacy_date_window', 'semver')) { + throw "tag_family must be one of: auto, legacy_date_window, semver. actual='$tagFamily'" + } + $keepLatestNText = [string]'${{ inputs.keep_latest_n }}' $keepLatestN = 1 if (-not [string]::IsNullOrWhiteSpace($keepLatestNText)) { @@ -64,6 +77,7 @@ jobs: & pwsh -NoProfile -File ./scripts/Invoke-CanarySmokeTagHygiene.ps1 ` -Repository '${{ github.repository }}' ` -DateUtc $targetDate ` + -TagFamily $tagFamily ` -KeepLatestN $keepLatestN ` -Delete:$applyChanges ` -OutputPath $reportPath diff --git a/.github/workflows/release-with-windows-gate.yml b/.github/workflows/release-with-windows-gate.yml index 67be5a5..9b13dd0 100644 --- a/.github/workflows/release-with-windows-gate.yml +++ b/.github/workflows/release-with-windows-gate.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: release_tag: - description: Release tag to publish (for example, v0.1.0). + description: Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N). required: true type: string allow_existing_tag: diff --git a/.github/workflows/release-workspace-installer.yml b/.github/workflows/release-workspace-installer.yml index 4d00d8b..984fbe0 100644 --- a/.github/workflows/release-workspace-installer.yml +++ b/.github/workflows/release-workspace-installer.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: release_tag: - description: Release tag to publish (for example, v0.1.0). + description: Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N). required: true type: string allow_existing_tag: diff --git a/AGENTS.md b/AGENTS.md index 31e164c..43c31db 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -77,8 +77,12 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Override path must emit explicit warning summary and append override disclosure to release notes. - `.github/workflows/release-workspace-installer.yml` is retained as a dispatch wrapper for diagnostics/fallback and must call `./.github/workflows/_release-workspace-installer-core.yml`. - `.github/workflows/windows-labview-image-gate.yml` is retained as a dispatch wrapper for diagnostics/fallback and must call `./.github/workflows/_windows-labview-image-gate-core.yml`. -- Publishing mode is manual dispatch only with explicit semantic tag input (`v..`). +- Publishing mode is manual dispatch only with dual-mode tag support: + - preferred SemVer tags (`v..`, `v..-rc.`, `v..-canary.`) + - legacy migration tags (`v0.YYYYMMDD.N`) - Release channel metadata is supported via `release_channel` input (`stable`, `prerelease`, `canary`); default is derived from `prerelease`. +- Release workflow must enforce deterministic channel/tag consistency and fail with `[channel_tag_mismatch]` when `release_tag`, `prerelease`, and `release_channel` disagree. +- Release workflow must emit deterministic `[tag_migration_warning]` when legacy date-window tags are used. - Release tags are immutable by default: existing tags must fail publication unless `allow_existing_tag=true` is explicitly set for break-glass recovery. - Release creation must bind tag creation to the exact workflow commit SHA (`github.sha`), not a moving branch target. - Keep fork-first mutation rules when preparing release changes: @@ -197,7 +201,10 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `scripts/Exercise-ReleaseControlPlaneLocal.ps1` - `.github/workflows/ops-monitoring.yml` remains strict-default and must keep Docker Desktop parity visibility labels in its default snapshot path (`windows-containers`, `user-session`, `cdev-surface-windows-gate`). - `.github/workflows/canary-smoke-tag-hygiene.yml` is the canary smoke tag retention workflow. -- It must run `scripts/Invoke-CanarySmokeTagHygiene.ps1` and enforce deterministic keep-latest behavior for tags matching `v0.YYYYMMDD.N`. +- It must run `scripts/Invoke-CanarySmokeTagHygiene.ps1` and enforce deterministic keep-latest behavior for dual-mode canary tags: + - legacy date-window tags (`v0.YYYYMMDD.N`) + - SemVer canary tags (`vX.Y.Z-canary.N`) +- Hygiene workflow default mode must be `auto` so both tag families are processed during migration. - `.github/workflows/ops-autoremediate.yml` is the deterministic remediation workflow and must run `scripts/Invoke-OpsAutoRemediation.ps1`. - Auto-remediation reason codes must remain explicit: - `already_healthy` @@ -216,6 +223,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - canary: `1-49` - prerelease: `50-79` - stable: `80-99` +- Release-control-plane currently emits legacy date-window tags and must include deterministic migration warnings in execution reports. - Promotion must gate on source release integrity (required assets + source commit equals branch head). - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. - `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Invoke-OpsSloSelfHealing.ps1`. diff --git a/README.md b/README.md index 9de48d1..a47bc42 100644 --- a/README.md +++ b/README.md @@ -250,10 +250,13 @@ Publish the Linux parity image to GHCR with deterministic tags: Use manual workflow dispatch for release publication: 1. Run `.github/workflows/release-with-windows-gate.yml`. -2. Provide a new `release_tag` in semantic format (for example, `v0.1.1`). +2. Provide a new `release_tag`: + - Preferred SemVer: `vX.Y.Z` (stable), `vX.Y.Z-rc.N` (prerelease), `vX.Y.Z-canary.N` (canary). + - Legacy migration compatibility: `v0.YYYYMMDD.N`. 3. Keep `allow_existing_tag=false` (default). Set `true` only for break-glass overwrite operations. -4. Set `prerelease` as needed. +4. Set `prerelease` to match the tag family (`true` for prerelease/canary tags, `false` for stable tags). 5. Keep `allow_gate_override=false` (default). +6. Set `release_channel` explicitly for canary tags (`canary`) to satisfy channel/tag consistency checks. Automated flow: 1. `repo_guard` verifies release runs only in `LabVIEW-Community-CI-CD/labview-cdev-surface`. @@ -366,7 +369,10 @@ Incident lifecycle is deterministic and shared by ops workflows via `scripts/Inv Every run uploads `ops-monitoring-report.json`. -`canary-smoke-tag-hygiene.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-CanarySmokeTagHygiene.ps1` to keep latest `v0.YYYYMMDD.N` canary smoke tag(s) for a UTC date and delete older tags deterministically. +`canary-smoke-tag-hygiene.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-CanarySmokeTagHygiene.ps1` in dual-mode: +- `legacy_date_window`: keeps latest `v0.YYYYMMDD.N` canary smoke tag(s) for the selected UTC date. +- `semver`: keeps latest SemVer canary tags (`vX.Y.Z-canary.N`). +- `auto` (default): applies both policies in one deterministic pass. `ops-autoremediate.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Invoke-OpsAutoRemediation.ps1` to: - auto-dispatch and verify cdev-cli sync-guard when sync drift is detected @@ -382,7 +388,7 @@ Every run uploads `ops-monitoring-report.json`. Control-plane behavior: 1. Runs ops health gate and optional auto-remediation. -2. Dispatches release workflow with deterministic channel-specific tag windows (`canary=1-49`, `prerelease=50-79`, `stable=80-99` for `v0.YYYYMMDD.N`). +2. Dispatches release workflow with deterministic channel-specific legacy tag windows (`canary=1-49`, `prerelease=50-79`, `stable=80-99` for `v0.YYYYMMDD.N`) and emits deterministic migration warnings. 3. Verifies run completion. 4. Applies canary smoke tag hygiene after canary publish. diff --git a/scripts/Invoke-CanarySmokeTagHygiene.ps1 b/scripts/Invoke-CanarySmokeTagHygiene.ps1 index 7cd8af6..f91bc6d 100644 --- a/scripts/Invoke-CanarySmokeTagHygiene.ps1 +++ b/scripts/Invoke-CanarySmokeTagHygiene.ps1 @@ -17,6 +17,14 @@ param( [ValidateNotNullOrEmpty()] [string]$CanaryTagRegex = '^v0\.(?\d{8})\.(?\d+)$', + [Parameter()] + [ValidateNotNullOrEmpty()] + [string]$SemverCanaryTagRegex = '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)-(?[0-9A-Za-z-]*(?i:canary)[0-9A-Za-z-]*(?:\.[0-9A-Za-z-]+)*)(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$', + + [Parameter()] + [ValidateSet('auto', 'legacy_date_window', 'semver')] + [string]$TagFamily = 'auto', + [Parameter()] [bool]$RequirePrerelease = $true, @@ -41,7 +49,9 @@ $report = [ordered]@{ timestamp_utc = Get-UtcNowIso repository = $Repository target_date_utc = $DateUtc - canary_tag_regex = $CanaryTagRegex + tag_family_mode = $TagFamily + legacy_canary_tag_regex = $CanaryTagRegex + semver_canary_tag_regex = $SemverCanaryTagRegex require_prerelease = $RequirePrerelease keep_latest_n = $KeepLatestN delete_enabled = [bool]$Delete @@ -51,56 +61,98 @@ $report = [ordered]@{ message = '' releases_scanned = 0 candidate_count = 0 + legacy_candidate_count = 0 + semver_candidate_count = 0 kept_tags = @() delete_candidates = @() deleted_tags = @() + migration_warnings = @() } try { $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) $report.releases_scanned = @($releaseList).Count - $candidates = @() + $legacyCandidates = @() + $semverCandidates = @() foreach ($release in $releaseList) { $tagName = [string]$release.tagName if ([string]::IsNullOrWhiteSpace($tagName)) { continue } - $match = [regex]::Match($tagName, $CanaryTagRegex) - if (-not $match.Success) { - continue - } - - $tagDate = [string]$match.Groups['date'].Value - if ($tagDate -ne $DateUtc) { - continue + $isPrerelease = [bool]$release.isPrerelease + $publishedAt = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$release.publishedAt, [ref]$publishedAt) + $publishedAtUtcText = if ($publishedAt -eq [DateTimeOffset]::MinValue) { '' } else { $publishedAt.ToUniversalTime().ToString('o') } + + $legacyMatch = [regex]::Match($tagName, $CanaryTagRegex) + if ($legacyMatch.Success) { + $tagDate = [string]$legacyMatch.Groups['date'].Value + if ($tagDate -eq $DateUtc) { + $sequenceText = [string]$legacyMatch.Groups['sequence'].Value + $sequence = 0 + if ([int]::TryParse($sequenceText, [ref]$sequence)) { + if (-not $RequirePrerelease -or $isPrerelease) { + $legacyCandidates += [ordered]@{ + tag_name = $tagName + tag_family = 'legacy_date_window' + sequence = $sequence + major = -1 + minor = -1 + patch = -1 + is_prerelease = $isPrerelease + published_at_utc = $publishedAtUtcText + } + } + } + } } - $sequenceText = [string]$match.Groups['sequence'].Value - $sequence = 0 - if (-not [int]::TryParse($sequenceText, [ref]$sequence)) { - continue + $semverMatch = [regex]::Match($tagName, $SemverCanaryTagRegex) + if ($semverMatch.Success) { + if (-not $RequirePrerelease -or $isPrerelease) { + $semverCandidates += [ordered]@{ + tag_name = $tagName + tag_family = 'semver' + sequence = -1 + major = [int]$semverMatch.Groups['major'].Value + minor = [int]$semverMatch.Groups['minor'].Value + patch = [int]$semverMatch.Groups['patch'].Value + is_prerelease = $isPrerelease + published_at_utc = $publishedAtUtcText + } + } } + } - $isPrerelease = [bool]$release.isPrerelease - if ($RequirePrerelease -and -not $isPrerelease) { - continue - } + $report.legacy_candidate_count = @($legacyCandidates).Count + $report.semver_candidate_count = @($semverCandidates).Count - $publishedAt = [DateTimeOffset]::MinValue - [void][DateTimeOffset]::TryParse([string]$release.publishedAt, [ref]$publishedAt) + if (@($legacyCandidates).Count -gt 0) { + $report.migration_warnings += "Legacy date-window canary tags were detected for date '$DateUtc'. SemVer canary tags are preferred." + } + if ($TagFamily -eq 'auto' -and @($legacyCandidates).Count -gt 0 -and @($semverCandidates).Count -gt 0) { + $report.migration_warnings += "Dual-mode hygiene processed both legacy_date_window and semver canary tags." + } - $candidates += [ordered]@{ - tag_name = $tagName - sequence = $sequence - is_prerelease = $isPrerelease - published_at_utc = if ($publishedAt -eq [DateTimeOffset]::MinValue) { '' } else { $publishedAt.ToUniversalTime().ToString('o') } + $selectedLegacyCandidates = @() + $selectedSemverCandidates = @() + switch ($TagFamily) { + 'legacy_date_window' { + $selectedLegacyCandidates = @($legacyCandidates) + } + 'semver' { + $selectedSemverCandidates = @($semverCandidates) + } + default { + $selectedLegacyCandidates = @($legacyCandidates) + $selectedSemverCandidates = @($semverCandidates) } } - $orderedCandidates = @( - $candidates | Sort-Object ` + $orderedLegacyCandidates = @( + $selectedLegacyCandidates | Sort-Object ` @{ Expression = { [int]$_.sequence }; Descending = $true }, ` @{ Expression = { $parsed = [DateTimeOffset]::MinValue @@ -109,16 +161,41 @@ try { }; Descending = $true }, ` @{ Expression = { [string]$_.tag_name }; Descending = $false } ) + $orderedSemverCandidates = @( + $selectedSemverCandidates | Sort-Object ` + @{ Expression = { [int]$_.major }; Descending = $true }, ` + @{ Expression = { [int]$_.minor }; Descending = $true }, ` + @{ Expression = { [int]$_.patch }; Descending = $true }, ` + @{ Expression = { + $parsed = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$_.published_at_utc, [ref]$parsed) + $parsed + }; Descending = $true }, ` + @{ Expression = { [string]$_.tag_name }; Descending = $false } + ) + + $orderedCandidates = @($orderedLegacyCandidates + $orderedSemverCandidates) $report.candidate_count = @($orderedCandidates).Count if (@($orderedCandidates).Count -eq 0) { $report.status = 'pass' $report.reason_code = 'no_matching_tags' - $report.message = "No canary releases matched date '$DateUtc'." + if ($TagFamily -eq 'legacy_date_window') { + $report.message = "No legacy canary releases matched date '$DateUtc'." + } elseif ($TagFamily -eq 'semver') { + $report.message = 'No SemVer canary releases matched hygiene policy.' + } else { + $report.message = "No canary releases matched hygiene policy for mode '$TagFamily'." + } } else { - $kept = @($orderedCandidates | Select-Object -First $KeepLatestN) - $deleteCandidates = @($orderedCandidates | Select-Object -Skip $KeepLatestN) + $keptLegacy = @($orderedLegacyCandidates | Select-Object -First $KeepLatestN) + $deleteLegacy = @($orderedLegacyCandidates | Select-Object -Skip $KeepLatestN) + $keptSemver = @($orderedSemverCandidates | Select-Object -First $KeepLatestN) + $deleteSemver = @($orderedSemverCandidates | Select-Object -Skip $KeepLatestN) + + $kept = @($keptLegacy + $keptSemver) + $deleteCandidates = @($deleteLegacy + $deleteSemver) $report.kept_tags = @($kept) $report.delete_candidates = @($deleteCandidates) @@ -147,11 +224,11 @@ try { $report.deleted_tags = @($deleted) $report.status = 'pass' $report.reason_code = 'applied' - $report.message = "Deleted $(@($deleted).Count) stale canary release tags for date '$DateUtc'." + $report.message = "Deleted $(@($deleted).Count) stale canary release tags for mode '$TagFamily'." } else { $report.status = 'pass' $report.reason_code = 'dry_run' - $report.message = "Dry-run only. $(@($deleteCandidates).Count) stale canary tags would be deleted for date '$DateUtc'." + $report.message = "Dry-run only. $(@($deleteCandidates).Count) stale canary tags would be deleted for mode '$TagFamily'." } } } diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index ad8bbba..0a518fb 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -249,11 +249,15 @@ function Invoke-ReleaseMode { } $targetTag = "v0.$DateKey.$nextSequence" + $tagMigrationWarning = "Control-plane generated legacy date-window tag '$targetTag'. Prefer SemVer tags for manual dispatch (stable: vX.Y.Z, prerelease: vX.Y.Z-rc.N, canary: vX.Y.Z-canary.N)." + Write-Warning "[tag_migration_warning] $tagMigrationWarning" $ExecutionReport.target_release = [ordered]@{ mode = $ModeName channel = [string]$modeConfig.channel prerelease = [bool]$modeConfig.prerelease tag = $targetTag + tag_family = 'legacy_date_window' + migration_warning = $tagMigrationWarning range_min = [int]$modeConfig.range_min range_max = [int]$modeConfig.range_max } diff --git a/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 b/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 index 4d8d6a2..7e44c8b 100644 --- a/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 +++ b/tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1 @@ -24,6 +24,9 @@ Describe 'Canary smoke tag hygiene workflow contract' { $script:workflowContent | Should -Match 'schedule:' $script:workflowContent | Should -Match 'workflow_dispatch:' $script:workflowContent | Should -Match 'target_date_utc' + $script:workflowContent | Should -Match 'tag_family' + $script:workflowContent | Should -Match 'legacy_date_window' + $script:workflowContent | Should -Match 'semver' $script:workflowContent | Should -Match 'keep_latest_n' $script:workflowContent | Should -Match 'apply_changes' $script:workflowContent | Should -Match 'type:\s*boolean' @@ -39,7 +42,13 @@ Describe 'Canary smoke tag hygiene workflow contract' { $script:scriptContent | Should -Match 'Get-GhReleasesPortable' $script:scriptContent | Should -Match 'release''\s*,\s*''delete''' $script:scriptContent | Should -Match '--cleanup-tag' + $script:scriptContent | Should -Match "ValidateSet\('auto', 'legacy_date_window', 'semver'\)" $script:scriptContent | Should -Match 'KeepLatestN' + $script:scriptContent | Should -Match 'SemverCanaryTagRegex' + $script:scriptContent | Should -Match 'tag_family_mode' + $script:scriptContent | Should -Match 'legacy_candidate_count' + $script:scriptContent | Should -Match 'semver_candidate_count' + $script:scriptContent | Should -Match 'migration_warnings' $script:scriptContent | Should -Match '\(\?\\d\{8\}\)' $script:scriptContent | Should -Match '\(\?\\d\+\)' $script:scriptContent | Should -Match 'delete_count_exceeds_guard' diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 1b4b88e..8735077 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -57,6 +57,8 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'promotion_source_asset_missing' $script:runtimeContent | Should -Match 'promotion_source_not_at_head' $script:runtimeContent | Should -Match 'release_tag_range_exhausted' + $script:runtimeContent | Should -Match '\[tag_migration_warning\]' + $script:runtimeContent | Should -Match "tag_family = 'legacy_date_window'" $script:runtimeContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' $script:runtimeContent | Should -Match '\$dispatchInputs = @\(' $script:runtimeContent | Should -Match '-Inputs \$dispatchInputs' diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 0bbc487..78a22ff 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -109,7 +109,10 @@ Describe 'Workspace installer release workflow contract' { } It 'enforces release notes, tag validation, and override disclosure support' { - $script:coreWorkflowContent | Should -Match '\^v\[0-9\]\+\\\.\[0-9\]\+\\\.\[0-9\]\+\$' + $script:coreWorkflowContent | Should -Match 'legacyTagPattern' + $script:coreWorkflowContent | Should -Match 'semverTagPattern' + $script:coreWorkflowContent | Should -Match '\[channel_tag_mismatch\]' + $script:coreWorkflowContent | Should -Match '\[tag_migration_warning\]' $script:coreWorkflowContent | Should -Match 'SHA256' $script:coreWorkflowContent | Should -Match 'Release target commit' $script:coreWorkflowContent | Should -Match 'lvie-cdev-workspace-installer\.exe /S' @@ -120,6 +123,7 @@ Describe 'Workspace installer release workflow contract' { $script:coreWorkflowContent | Should -Match 'release_channel' $script:coreWorkflowContent | Should -Match 'release-manifest\.json' $script:coreWorkflowContent | Should -Match 'Override Disclosure' + $script:coreWorkflowContent | Should -Match 'Tag Policy Notice' $script:coreWorkflowContent | Should -Match 'OVERRIDE_APPLIED' } } From 6211ac578ca72e0710aa05127b7eb8c2a392bad0 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:40:06 -0800 Subject: [PATCH 32/60] Quote release tag descriptions to restore workflow dispatch parsing --- .github/workflows/_release-workspace-installer-core.yml | 2 +- .github/workflows/release-with-windows-gate.yml | 2 +- .github/workflows/release-workspace-installer.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_release-workspace-installer-core.yml b/.github/workflows/_release-workspace-installer-core.yml index 6fc4245..56e8abd 100644 --- a/.github/workflows/_release-workspace-installer-core.yml +++ b/.github/workflows/_release-workspace-installer-core.yml @@ -4,7 +4,7 @@ on: workflow_call: inputs: release_tag: - description: Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N). + description: "Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N)." required: true type: string allow_existing_tag: diff --git a/.github/workflows/release-with-windows-gate.yml b/.github/workflows/release-with-windows-gate.yml index 9b13dd0..1c928b1 100644 --- a/.github/workflows/release-with-windows-gate.yml +++ b/.github/workflows/release-with-windows-gate.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: release_tag: - description: Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N). + description: "Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N)." required: true type: string allow_existing_tag: diff --git a/.github/workflows/release-workspace-installer.yml b/.github/workflows/release-workspace-installer.yml index 984fbe0..2bd894d 100644 --- a/.github/workflows/release-workspace-installer.yml +++ b/.github/workflows/release-workspace-installer.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: release_tag: - description: Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N). + description: "Release tag to publish (SemVer preferred: vX.Y.Z / vX.Y.Z-rc.N / vX.Y.Z-canary.N; legacy migration: v0.YYYYMMDD.N)." required: true type: string allow_existing_tag: From b910d88b0c4dc6437787e7e8271a61a42393f2d7 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 20:57:06 -0800 Subject: [PATCH 33/60] feat(control-plane): cut over rollout orchestration to semver tags --- .github/workflows/release-control-plane.yml | 2 +- AGENTS.md | 12 +- README.md | 10 +- scripts/Invoke-ReleaseControlPlane.ps1 | 409 +++++++++++++++--- scripts/Invoke-ReleaseRollbackDrill.ps1 | 210 +++++++-- scripts/Invoke-RollbackDrillSelfHealing.ps1 | 255 +++++++++-- ...easeControlPlaneWorkflowContract.Tests.ps1 | 17 +- ...aseRollbackDrillWorkflowContract.Tests.ps1 | 7 + 8 files changed, 758 insertions(+), 164 deletions(-) diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 30b59fb..9cffbe3 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -27,7 +27,7 @@ on: default: true type: boolean keep_latest_canary_n: - description: Number of canary smoke tags to keep per date. + description: Number of latest SemVer canary tags to keep. required: false default: '1' type: string diff --git a/AGENTS.md b/AGENTS.md index 43c31db..25c59ce 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -219,11 +219,13 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `PromotePrerelease` - `PromoteStable` - `FullCycle` -- Channel tag windows are deterministic for `v0.YYYYMMDD.N`: - - canary: `1-49` - - prerelease: `50-79` - - stable: `80-99` -- Release-control-plane currently emits legacy date-window tags and must include deterministic migration warnings in execution reports. +- Release-control-plane dispatch tags must follow SemVer channel strategy: + - canary: `vX.Y.Z-canary.N` + - prerelease: `vX.Y.Z-rc.N` (promoted from semver canary) + - stable: `vX.Y.Z` (promoted from semver prerelease) +- Release-control-plane canary hygiene invocation must enforce `TagFamily=semver`. +- Legacy date-window tags (`v0.YYYYMMDD.N`) may still exist during migration but are non-canonical for control-plane dispatch. +- When legacy tags are observed, control-plane execution must emit deterministic migration warnings. - Promotion must gate on source release integrity (required assets + source commit equals branch head). - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. - `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Invoke-OpsSloSelfHealing.ps1`. diff --git a/README.md b/README.md index a47bc42..24b5b48 100644 --- a/README.md +++ b/README.md @@ -388,9 +388,13 @@ Every run uploads `ops-monitoring-report.json`. Control-plane behavior: 1. Runs ops health gate and optional auto-remediation. -2. Dispatches release workflow with deterministic channel-specific legacy tag windows (`canary=1-49`, `prerelease=50-79`, `stable=80-99` for `v0.YYYYMMDD.N`) and emits deterministic migration warnings. -3. Verifies run completion. -4. Applies canary smoke tag hygiene after canary publish. +2. Dispatches release workflow with deterministic SemVer channel tags: + - canary: `vX.Y.Z-canary.N` + - prerelease: `vX.Y.Z-rc.N` (promoted from latest semver canary) + - stable: `vX.Y.Z` (promoted from latest semver prerelease on Monday window) +3. Verifies run completion and promotion source integrity (`assets + source commit == branch head`). +4. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. +5. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present. `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 0a518fb..d23c507 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -66,8 +66,6 @@ function Get-ModeConfig { return [ordered]@{ channel = 'canary' prerelease = $true - range_min = 1 - range_max = 49 source_channel_for_promotion = '' enforce_prerelease_source = $false } @@ -76,8 +74,6 @@ function Get-ModeConfig { return [ordered]@{ channel = 'prerelease' prerelease = $true - range_min = 50 - range_max = 79 source_channel_for_promotion = 'canary' enforce_prerelease_source = $true } @@ -86,8 +82,6 @@ function Get-ModeConfig { return [ordered]@{ channel = 'stable' prerelease = $false - range_min = 80 - range_max = 99 source_channel_for_promotion = 'prerelease' enforce_prerelease_source = $true } @@ -98,69 +92,320 @@ function Get-ModeConfig { } } -function Parse-ReleaseTag { - param([Parameter(Mandatory = $true)][string]$TagName) +function Get-ReleasePublishedSortValue { + param([Parameter(Mandatory = $true)][object]$Record) - $match = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') - if (-not $match.Success) { - return $null - } + $parsed = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$Record.published_at_utc, [ref]$parsed) + return $parsed +} - $sequence = 0 - if (-not [int]::TryParse([string]$match.Groups['sequence'].Value, [ref]$sequence)) { - return $null - } +function New-CoreVersion { + param( + [Parameter(Mandatory = $true)][int]$Major, + [Parameter(Mandatory = $true)][int]$Minor, + [Parameter(Mandatory = $true)][int]$Patch + ) return [ordered]@{ - tag_name = $TagName - date = [string]$match.Groups['date'].Value - sequence = $sequence + major = $Major + minor = $Minor + patch = $Patch } } -function Get-ReleaseRecordsForDate { +function Format-CoreVersion { + param([Parameter(Mandatory = $true)]$Core) + return "{0}.{1}.{2}" -f [int]$Core.major, [int]$Core.minor, [int]$Core.patch +} + +function Compare-CoreVersion { param( - [Parameter(Mandatory = $true)][object[]]$ReleaseList, - [Parameter(Mandatory = $true)][string]$DateKey + [Parameter(Mandatory = $true)]$Left, + [Parameter(Mandatory = $true)]$Right ) - $records = @() - foreach ($release in $ReleaseList) { - $parsed = Parse-ReleaseTag -TagName ([string]$release.tagName) - if ($null -eq $parsed) { + foreach ($part in @('major', 'minor', 'patch')) { + $l = [int]$Left.$part + $r = [int]$Right.$part + if ($l -gt $r) { return 1 } + if ($l -lt $r) { return -1 } + } + + return 0 +} + +function Get-MaxCoreVersion { + param([Parameter(Mandatory = $true)][object[]]$Records) + + $maxCore = $null + foreach ($record in @($Records)) { + $candidate = New-CoreVersion -Major ([int]$record.major) -Minor ([int]$record.minor) -Patch ([int]$record.patch) + if ($null -eq $maxCore) { + $maxCore = $candidate continue } - if ([string]$parsed.date -ne $DateKey) { - continue + + if ((Compare-CoreVersion -Left $candidate -Right $maxCore) -gt 0) { + $maxCore = $candidate } + } + + return $maxCore +} + +function Test-CoreEquals { + param( + [Parameter(Mandatory = $true)]$Left, + [Parameter(Mandatory = $true)]$Right + ) + + return ((Compare-CoreVersion -Left $Left -Right $Right) -eq 0) +} + +function Get-SequenceFromLabel { + param( + [Parameter(Mandatory = $true)][string]$Label, + [Parameter(Mandatory = $true)][string]$Token + ) - $records += [ordered]@{ - tag_name = [string]$parsed.tag_name - date = [string]$parsed.date - sequence = [int]$parsed.sequence - is_prerelease = [bool]$release.isPrerelease - published_at_utc = [string]$release.publishedAt + if ([string]::IsNullOrWhiteSpace($Label)) { + return 0 + } + + $pattern = "(?i)(?:^|[.-]){0}[.-](?\d+)(?:$|[.-])" -f [regex]::Escape($Token) + $match = [regex]::Match($Label, $pattern) + if (-not $match.Success) { + return 0 + } + + $value = 0 + if (-not [int]::TryParse([string]$match.Groups['n'].Value, [ref]$value)) { + return 0 + } + + return $value +} + +function Convert-ReleaseToRecord { + param([Parameter(Mandatory = $true)][object]$Release) + + $tagName = [string]$Release.tagName + if ([string]::IsNullOrWhiteSpace($tagName)) { + return $null + } + + $isPrerelease = [bool]$Release.isPrerelease + $publishedAt = [string]$Release.publishedAt + $url = [string]$Release.url + + $legacyMatch = [regex]::Match($tagName, '^v0\.(?\d{8})\.(?\d+)$') + if ($legacyMatch.Success) { + $legacySequence = 0 + if (-not [int]::TryParse([string]$legacyMatch.Groups['sequence'].Value, [ref]$legacySequence)) { + return $null } + + $legacyChannel = 'unknown' + if ($legacySequence -ge 1 -and $legacySequence -le 49 -and $isPrerelease) { + $legacyChannel = 'canary' + } elseif ($legacySequence -ge 50 -and $legacySequence -le 79 -and $isPrerelease) { + $legacyChannel = 'prerelease' + } elseif ($legacySequence -ge 80 -and $legacySequence -le 99 -and -not $isPrerelease) { + $legacyChannel = 'stable' + } + + return [ordered]@{ + tag_name = $tagName + tag_family = 'legacy_date_window' + channel = $legacyChannel + is_prerelease = $isPrerelease + published_at_utc = $publishedAt + url = $url + major = 0 + minor = 0 + patch = 0 + prerelease_label = '' + prerelease_sequence = 0 + legacy_date = [string]$legacyMatch.Groups['date'].Value + legacy_sequence = $legacySequence + } + } + + $semverMatch = [regex]::Match( + $tagName, + '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)(?:-(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$' + ) + if (-not $semverMatch.Success) { + return $null } - return @($records | Sort-Object @{ Expression = { [int]$_.sequence }; Descending = $true }) + $prereleaseLabel = [string]$semverMatch.Groups['prerelease'].Value + $channel = 'stable' + $sequence = 0 + if (-not [string]::IsNullOrWhiteSpace($prereleaseLabel)) { + if ($prereleaseLabel -match '(?i)(^|[.\-])canary([.\-]|$)') { + $channel = 'canary' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'canary' + } else { + $channel = 'prerelease' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'rc' + } + } + + return [ordered]@{ + tag_name = $tagName + tag_family = 'semver' + channel = $channel + is_prerelease = $isPrerelease + published_at_utc = $publishedAt + url = $url + major = [int]$semverMatch.Groups['major'].Value + minor = [int]$semverMatch.Groups['minor'].Value + patch = [int]$semverMatch.Groups['patch'].Value + prerelease_label = $prereleaseLabel + prerelease_sequence = $sequence + legacy_date = '' + legacy_sequence = 0 + } } -function Get-LatestRecordInRange { +function Get-LatestSemVerRecordByChannel { param( [Parameter(Mandatory = $true)][object[]]$Records, - [Parameter(Mandatory = $true)][int]$RangeMin, - [Parameter(Mandatory = $true)][int]$RangeMax + [Parameter(Mandatory = $true)][string]$Channel ) return @( $Records | - Where-Object { [int]$_.sequence -ge $RangeMin -and [int]$_.sequence -le $RangeMax } | - Sort-Object @{ Expression = { [int]$_.sequence }; Descending = $true } | + Where-Object { [string]$_.tag_family -eq 'semver' -and [string]$_.channel -eq $Channel } | + Sort-Object ` + @{ Expression = { [int]$_.major }; Descending = $true }, ` + @{ Expression = { [int]$_.minor }; Descending = $true }, ` + @{ Expression = { [int]$_.patch }; Descending = $true }, ` + @{ Expression = { [int]$_.prerelease_sequence }; Descending = $true }, ` + @{ Expression = { Get-ReleasePublishedSortValue -Record $_ }; Descending = $true }, ` + @{ Expression = { [string]$_.tag_name }; Descending = $false } | Select-Object -First 1 ) } +function Get-MaxPrereleaseSequenceForCore { + param( + [Parameter(Mandatory = $true)][object[]]$Records, + [Parameter(Mandatory = $true)]$Core, + [Parameter(Mandatory = $true)][string]$Channel + ) + + $matched = @( + $Records | + Where-Object { + ([string]$_.tag_family -eq 'semver') -and + ([string]$_.channel -eq $Channel) -and + ([int]$_.major -eq [int]$Core.major) -and + ([int]$_.minor -eq [int]$Core.minor) -and + ([int]$_.patch -eq [int]$Core.patch) + } | + ForEach-Object { [int]$_.prerelease_sequence } + ) + if (@($matched).Count -eq 0) { + return 0 + } + + return [int]((@($matched) | Measure-Object -Maximum).Maximum) +} + +function Resolve-CanaryTargetSemVer { + param([Parameter(Mandatory = $true)][object[]]$Records) + + $semverRecords = @($Records | Where-Object { [string]$_.tag_family -eq 'semver' }) + $stableRecords = @($semverRecords | Where-Object { [string]$_.channel -eq 'stable' }) + $nonStableRecords = @($semverRecords | Where-Object { [string]$_.channel -ne 'stable' }) + + $latestStableCore = Get-MaxCoreVersion -Records $stableRecords + $latestNonStableCore = Get-MaxCoreVersion -Records $nonStableRecords + + $targetCore = $null + if ($null -ne $latestNonStableCore -and (($null -eq $latestStableCore) -or ((Compare-CoreVersion -Left $latestNonStableCore -Right $latestStableCore) -gt 0))) { + $targetCore = $latestNonStableCore + } elseif ($null -ne $latestStableCore) { + $targetCore = New-CoreVersion -Major ([int]$latestStableCore.major) -Minor ([int]$latestStableCore.minor) -Patch ([int]$latestStableCore.patch + 1) + } elseif ($null -ne $latestNonStableCore) { + $targetCore = $latestNonStableCore + } else { + $targetCore = New-CoreVersion -Major 0 -Minor 1 -Patch 0 + } + + $maxCanarySequence = Get-MaxPrereleaseSequenceForCore -Records $semverRecords -Core $targetCore -Channel 'canary' + $nextCanarySequence = $maxCanarySequence + 1 + if ($nextCanarySequence -gt 9999) { + throw "semver_prerelease_sequence_exhausted: channel=canary core=$(Format-CoreVersion -Core $targetCore) next_sequence=$nextCanarySequence" + } + + return [ordered]@{ + core = $targetCore + prerelease_sequence = $nextCanarySequence + tag = "v$(Format-CoreVersion -Core $targetCore)-canary.$nextCanarySequence" + } +} + +function Resolve-PromotedTargetSemVer { + param( + [Parameter(Mandatory = $true)][object[]]$Records, + [Parameter(Mandatory = $true)][string]$TargetChannel, + [Parameter(Mandatory = $true)]$SourceCore + ) + + if ([string]$TargetChannel -eq 'prerelease') { + $maxRcSequence = Get-MaxPrereleaseSequenceForCore -Records $Records -Core $SourceCore -Channel 'prerelease' + $nextRcSequence = $maxRcSequence + 1 + if ($nextRcSequence -gt 9999) { + throw "semver_prerelease_sequence_exhausted: channel=prerelease core=$(Format-CoreVersion -Core $SourceCore) next_sequence=$nextRcSequence" + } + + return [ordered]@{ + core = $SourceCore + prerelease_sequence = $nextRcSequence + tag = "v$(Format-CoreVersion -Core $SourceCore)-rc.$nextRcSequence" + skipped = $false + reason_code = '' + } + } + + if ([string]$TargetChannel -eq 'stable') { + $stableExists = @( + $Records | + Where-Object { + ([string]$_.tag_family -eq 'semver') -and + ([string]$_.channel -eq 'stable') -and + ([int]$_.major -eq [int]$SourceCore.major) -and + ([int]$_.minor -eq [int]$SourceCore.minor) -and + ([int]$_.patch -eq [int]$SourceCore.patch) + } + ).Count -gt 0 + + if ($stableExists) { + return [ordered]@{ + core = $SourceCore + prerelease_sequence = 0 + tag = "v$(Format-CoreVersion -Core $SourceCore)" + skipped = $true + reason_code = 'stable_already_published' + } + } + + return [ordered]@{ + core = $SourceCore + prerelease_sequence = 0 + tag = "v$(Format-CoreVersion -Core $SourceCore)" + skipped = $false + reason_code = '' + } + } + + throw "unsupported_target_channel: $TargetChannel" +} + function Invoke-ReleaseMode { param( [Parameter(Mandatory = $true)][string]$ModeName, @@ -171,26 +416,30 @@ function Invoke-ReleaseMode { $modeConfig = Get-ModeConfig -ModeName $ModeName $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) - - $records = @(Get-ReleaseRecordsForDate -ReleaseList $releaseList -DateKey $DateKey) - $targetRangeRecords = @( - $records | - Where-Object { [int]$_.sequence -ge [int]$modeConfig.range_min -and [int]$_.sequence -le [int]$modeConfig.range_max } | - Sort-Object @{ Expression = { [int]$_.sequence }; Descending = $true } + $allRecords = @( + $releaseList | + ForEach-Object { Convert-ReleaseToRecord -Release $_ } | + Where-Object { $null -ne $_ } ) + $legacyRecords = @($allRecords | Where-Object { [string]$_.tag_family -eq 'legacy_date_window' -and [string]$_.channel -ne 'unknown' }) + $semverRecords = @($allRecords | Where-Object { [string]$_.tag_family -eq 'semver' }) + + $migrationWarnings = @() + if (@($legacyRecords).Count -gt 0) { + $migrationWarnings += "Legacy date-window release tags remain present in '$Repository'. Control-plane dispatch now targets SemVer channel tags." + } + $sourceRecord = $null + $sourceCore = $null if (-not [string]::IsNullOrWhiteSpace([string]$modeConfig.source_channel_for_promotion)) { - $sourceRange = switch ([string]$modeConfig.source_channel_for_promotion) { - 'canary' { [ordered]@{ min = 1; max = 49 } } - 'prerelease' { [ordered]@{ min = 50; max = 79 } } - default { throw "unsupported_source_channel: $([string]$modeConfig.source_channel_for_promotion)" } - } - $sourceRecord = @(Get-LatestRecordInRange -Records $records -RangeMin $sourceRange.min -RangeMax $sourceRange.max) - if (@($sourceRecord).Count -ne 1) { - throw "promotion_source_missing: channel=$([string]$modeConfig.source_channel_for_promotion) date=$DateKey" + $sourceCandidates = @(Get-LatestSemVerRecordByChannel -Records $allRecords -Channel ([string]$modeConfig.source_channel_for_promotion)) + if (@($sourceCandidates).Count -ne 1) { + throw "promotion_source_missing: channel=$([string]$modeConfig.source_channel_for_promotion) strategy=semver" } - $sourceTag = [string]$sourceRecord[0].tag_name + $sourceRecord = $sourceCandidates[0] + $sourceTag = [string]$sourceRecord.tag_name + $sourceCore = New-CoreVersion -Major ([int]$sourceRecord.major) -Minor ([int]$sourceRecord.minor) -Patch ([int]$sourceRecord.patch) $sourceRelease = Invoke-GhJson -Arguments @( 'release', 'view', $sourceTag, @@ -232,34 +481,50 @@ function Invoke-ReleaseMode { $ExecutionReport.source_release = [ordered]@{ channel = [string]$modeConfig.source_channel_for_promotion tag = $sourceTag + tag_family = 'semver' + core = Format-CoreVersion -Core $sourceCore + prerelease_sequence = [int]$sourceRecord.prerelease_sequence source_sha = $sourceCommit head_sha = $headSha url = [string]$sourceRelease.url } } - $nextSequence = if (@($targetRangeRecords).Count -eq 0) { - [int]$modeConfig.range_min + $targetPlan = $null + if ($ModeName -eq 'CanaryCycle') { + $targetPlan = Resolve-CanaryTargetSemVer -Records $allRecords + } elseif ($ModeName -eq 'PromotePrerelease' -or $ModeName -eq 'PromoteStable') { + if ($null -eq $sourceCore) { + throw "promotion_source_missing: channel=$([string]$modeConfig.source_channel_for_promotion) strategy=semver" + } + $targetPlan = Resolve-PromotedTargetSemVer -Records $allRecords -TargetChannel ([string]$modeConfig.channel) -SourceCore $sourceCore } else { - ([int]$targetRangeRecords[0].sequence) + 1 - } - - if ($nextSequence -gt [int]$modeConfig.range_max) { - throw "release_tag_range_exhausted: mode=$ModeName date=$DateKey next_sequence=$nextSequence range_max=$([int]$modeConfig.range_max)" + throw "unsupported_release_mode: $ModeName" } - $targetTag = "v0.$DateKey.$nextSequence" - $tagMigrationWarning = "Control-plane generated legacy date-window tag '$targetTag'. Prefer SemVer tags for manual dispatch (stable: vX.Y.Z, prerelease: vX.Y.Z-rc.N, canary: vX.Y.Z-canary.N)." - Write-Warning "[tag_migration_warning] $tagMigrationWarning" + $targetTag = [string]$targetPlan.tag + $targetCoreText = Format-CoreVersion -Core $targetPlan.core $ExecutionReport.target_release = [ordered]@{ mode = $ModeName channel = [string]$modeConfig.channel prerelease = [bool]$modeConfig.prerelease tag = $targetTag - tag_family = 'legacy_date_window' - migration_warning = $tagMigrationWarning - range_min = [int]$modeConfig.range_min - range_max = [int]$modeConfig.range_max + tag_family = 'semver' + core = $targetCoreText + prerelease_sequence = [int]$targetPlan.prerelease_sequence + status = if ([bool]$targetPlan.skipped) { 'skipped' } else { 'planned' } + reason_code = if ([bool]$targetPlan.skipped) { [string]$targetPlan.reason_code } else { '' } + migration_warnings = @($migrationWarnings) + } + + if (@($migrationWarnings).Count -gt 0) { + foreach ($warning in @($migrationWarnings)) { + Write-Warning "[tag_migration_warning] $warning" + } + } + + if ([bool]$targetPlan.skipped) { + return } if ($DryRun) { @@ -313,11 +578,12 @@ function Invoke-ReleaseMode { & pwsh -NoProfile -File $canaryHygieneScript ` -Repository $Repository ` -DateUtc $DateKey ` + -TagFamily semver ` -KeepLatestN $KeepLatestCanaryN ` -Delete ` -OutputPath $hygienePath if ($LASTEXITCODE -ne 0) { - throw "canary_hygiene_failed: date=$DateKey exit_code=$LASTEXITCODE" + throw "canary_hygiene_failed: tag_family=semver date=$DateKey exit_code=$LASTEXITCODE" } $ExecutionReport.hygiene = Get-Content -LiteralPath $hygienePath -Raw | ConvertFrom-Json -ErrorAction Stop } @@ -336,6 +602,8 @@ $report = [ordered]@{ auto_remediate = [bool]$AutoRemediate sync_guard_max_age_hours = $SyncGuardMaxAgeHours keep_latest_canary_n = $KeepLatestCanaryN + tag_strategy = 'semver' + migration_mode = 'dual_mode_publish_semver_control_plane' status = 'fail' reason_code = '' message = '' @@ -414,6 +682,7 @@ try { mode = 'PromoteStable' status = 'skipped' reason_code = 'stable_window_closed' + tag_family = 'semver' } } $dayOfWeekUtc = (Get-Date).ToUniversalTime().DayOfWeek.ToString() diff --git a/scripts/Invoke-ReleaseRollbackDrill.ps1 b/scripts/Invoke-ReleaseRollbackDrill.ps1 index f7b8a76..3543dcb 100644 --- a/scripts/Invoke-ReleaseRollbackDrill.ps1 +++ b/scripts/Invoke-ReleaseRollbackDrill.ps1 @@ -37,44 +37,104 @@ function Add-ReasonCode { } } -function Parse-ReleaseTagRecord { - param([Parameter(Mandatory = $true)][string]$TagName) +function Get-ReleasePublishedSortValue { + param([Parameter(Mandatory = $true)][object]$Candidate) + + $parsed = [DateTimeOffset]::MinValue + [void][DateTimeOffset]::TryParse([string]$Candidate.published_at_utc, [ref]$parsed) + return $parsed +} + +function Get-SequenceFromLabel { + param( + [Parameter(Mandatory = $true)][string]$Label, + [Parameter(Mandatory = $true)][string]$Token + ) - $match = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') + $pattern = "(?i)(?:^|[.-]){0}[.-](?\d+)(?:$|[.-])" -f [regex]::Escape($Token) + $match = [regex]::Match($Label, $pattern) if (-not $match.Success) { - return $null + return 0 } - $sequence = 0 - if (-not [int]::TryParse([string]$match.Groups['sequence'].Value, [ref]$sequence)) { - return $null + $value = 0 + if (-not [int]::TryParse([string]$match.Groups['n'].Value, [ref]$value)) { + return 0 } - return [ordered]@{ - tag_name = $TagName - date = [string]$match.Groups['date'].Value - sequence = $sequence - } + return $value } -function Test-ChannelMatch { +function Parse-ReleaseTagRecord { param( - [Parameter(Mandatory = $true)][object]$ReleaseRecord, - [Parameter(Mandatory = $true)][string]$TargetChannel + [Parameter(Mandatory = $true)][string]$TagName, + [Parameter(Mandatory = $true)][bool]$IsPrerelease + ) + + $legacyMatch = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') + if ($legacyMatch.Success) { + $legacySequence = 0 + if (-not [int]::TryParse([string]$legacyMatch.Groups['sequence'].Value, [ref]$legacySequence)) { + return $null + } + + $legacyChannel = 'unknown' + if ($legacySequence -ge 1 -and $legacySequence -le 49 -and $IsPrerelease) { + $legacyChannel = 'canary' + } elseif ($legacySequence -ge 50 -and $legacySequence -le 79 -and $IsPrerelease) { + $legacyChannel = 'prerelease' + } elseif ($legacySequence -ge 80 -and $legacySequence -le 99 -and -not $IsPrerelease) { + $legacyChannel = 'stable' + } + + return [ordered]@{ + tag_name = $TagName + tag_family = 'legacy_date_window' + channel = $legacyChannel + major = 0 + minor = 0 + patch = 0 + prerelease_label = '' + prerelease_sequence = 0 + legacy_date = [string]$legacyMatch.Groups['date'].Value + legacy_sequence = $legacySequence + is_prerelease = $IsPrerelease + } + } + + $semverMatch = [regex]::Match( + $TagName, + '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)(?:-(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$' ) + if (-not $semverMatch.Success) { + return $null + } - $parsed = Parse-ReleaseTagRecord -TagName ([string]$ReleaseRecord.tagName) - if ($null -eq $parsed) { - return $false + $prereleaseLabel = [string]$semverMatch.Groups['prerelease'].Value + $channel = 'stable' + $sequence = 0 + if (-not [string]::IsNullOrWhiteSpace($prereleaseLabel)) { + if ($prereleaseLabel -match '(?i)(^|[.\-])canary([.\-]|$)') { + $channel = 'canary' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'canary' + } else { + $channel = 'prerelease' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'rc' + } } - $seq = [int]$parsed.sequence - $isPrerelease = [bool]$ReleaseRecord.isPrerelease - switch ($TargetChannel) { - 'canary' { return $isPrerelease -and $seq -ge 1 -and $seq -le 49 } - 'prerelease' { return $isPrerelease -and $seq -ge 50 -and $seq -le 79 } - 'stable' { return (-not $isPrerelease) -and $seq -ge 80 -and $seq -le 99 } - default { return $false } + return [ordered]@{ + tag_name = $TagName + tag_family = 'semver' + channel = $channel + major = [int]$semverMatch.Groups['major'].Value + minor = [int]$semverMatch.Groups['minor'].Value + patch = [int]$semverMatch.Groups['patch'].Value + prerelease_label = $prereleaseLabel + prerelease_sequence = $sequence + legacy_date = '' + legacy_sequence = 0 + is_prerelease = $IsPrerelease } } @@ -93,10 +153,15 @@ $report = [ordered]@{ repository = $Repository channel = $Channel required_history_count = $RequiredHistoryCount + tag_strategy = 'semver_preferred_dual_mode' status = 'fail' reason_codes = @() message = '' candidate_count = 0 + semver_candidate_count = 0 + legacy_candidate_count = 0 + candidate_tag_family_selected = '' + migration_warnings = @() current = $null previous = $null required_assets = $requiredAssets @@ -105,36 +170,97 @@ $report = [ordered]@{ $reasonCodes = [System.Collections.Generic.List[string]]::new() $assetChecks = [System.Collections.Generic.List[object]]::new() +$migrationWarnings = [System.Collections.Generic.List[string]]::new() try { $releases = @(Get-GhReleasesPortable -Repository $Repository -Limit $ReleaseLimit -ExcludeDrafts) - $candidates = @( - $releases | - Where-Object { Test-ChannelMatch -ReleaseRecord $_ -TargetChannel $Channel } | - Sort-Object { - $parsed = Parse-ReleaseTagRecord -TagName ([string]$_.tagName) - "{0}-{1:D3}" -f [string]$parsed.date, [int]$parsed.sequence - } -Descending - ) + $channelCandidates = @() + foreach ($release in @($releases)) { + $parsed = Parse-ReleaseTagRecord -TagName ([string]$release.tagName) -IsPrerelease ([bool]$release.isPrerelease) + if ($null -eq $parsed) { + continue + } + if ([string]$parsed.channel -ne $Channel) { + continue + } + + $channelCandidates += [ordered]@{ + tag_name = [string]$release.tagName + tag_family = [string]$parsed.tag_family + channel = [string]$parsed.channel + is_prerelease = [bool]$release.isPrerelease + published_at_utc = [string]$release.publishedAt + url = [string]$release.url + major = [int]$parsed.major + minor = [int]$parsed.minor + patch = [int]$parsed.patch + prerelease_sequence = [int]$parsed.prerelease_sequence + legacy_date = [string]$parsed.legacy_date + legacy_sequence = [int]$parsed.legacy_sequence + } + } + + $semverCandidates = @($channelCandidates | Where-Object { [string]$_.tag_family -eq 'semver' }) + $legacyCandidates = @($channelCandidates | Where-Object { [string]$_.tag_family -eq 'legacy_date_window' }) + $report.semver_candidate_count = @($semverCandidates).Count + $report.legacy_candidate_count = @($legacyCandidates).Count + + if (@($legacyCandidates).Count -gt 0) { + [void]$migrationWarnings.Add("Legacy date-window rollback candidates were detected for channel '$Channel'.") + } + + $selectedFamily = '' + $selectedCandidates = @() + if (@($semverCandidates).Count -gt 0) { + $selectedFamily = 'semver' + $selectedCandidates = @( + $semverCandidates | + Sort-Object ` + @{ Expression = { [int]$_.major }; Descending = $true }, ` + @{ Expression = { [int]$_.minor }; Descending = $true }, ` + @{ Expression = { [int]$_.patch }; Descending = $true }, ` + @{ Expression = { [int]$_.prerelease_sequence }; Descending = $true }, ` + @{ Expression = { Get-ReleasePublishedSortValue -Candidate $_ }; Descending = $true }, ` + @{ Expression = { [string]$_.tag_name }; Descending = $false } + ) + if (@($legacyCandidates).Count -gt 0) { + [void]$migrationWarnings.Add("SemVer candidates were selected for rollback drill; legacy candidates were ignored for precedence.") + } + } else { + $selectedFamily = 'legacy_date_window' + $selectedCandidates = @( + $legacyCandidates | + Sort-Object ` + @{ Expression = { [string]$_.legacy_date }; Descending = $true }, ` + @{ Expression = { [int]$_.legacy_sequence }; Descending = $true }, ` + @{ Expression = { Get-ReleasePublishedSortValue -Candidate $_ }; Descending = $true }, ` + @{ Expression = { [string]$_.tag_name }; Descending = $false } + ) + } + + $report.candidate_tag_family_selected = $selectedFamily + $report.migration_warnings = @($migrationWarnings) + $report.candidate_count = @($selectedCandidates).Count - $report.candidate_count = @($candidates).Count - if (@($candidates).Count -lt $RequiredHistoryCount) { + if (@($selectedCandidates).Count -lt $RequiredHistoryCount) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'rollback_candidate_missing' } else { - $current = $candidates[0] - $previous = $candidates[1] + $current = $selectedCandidates[0] + $previous = $selectedCandidates[1] $report.current = [ordered]@{ - tag = [string]$current.tagName - published_at_utc = [string]$current.publishedAt + tag = [string]$current.tag_name + tag_family = [string]$current.tag_family + published_at_utc = [string]$current.published_at_utc url = [string]$current.url } $report.previous = [ordered]@{ - tag = [string]$previous.tagName - published_at_utc = [string]$previous.publishedAt + tag = [string]$previous.tag_name + tag_family = [string]$previous.tag_family + published_at_utc = [string]$previous.published_at_utc url = [string]$previous.url } - foreach ($tag in @([string]$current.tagName, [string]$previous.tagName)) { + foreach ($tag in @([string]$current.tag_name, [string]$previous.tag_name)) { $release = Invoke-GhJson -Arguments @( 'release', 'view', $tag, diff --git a/scripts/Invoke-RollbackDrillSelfHealing.ps1 b/scripts/Invoke-RollbackDrillSelfHealing.ps1 index 9308377..5099441 100644 --- a/scripts/Invoke-RollbackDrillSelfHealing.ps1 +++ b/scripts/Invoke-RollbackDrillSelfHealing.ps1 @@ -44,6 +44,10 @@ param( [ValidateRange(1, 99)] [int]$CanarySequenceMax = 49, + [Parameter()] + [ValidateSet('semver', 'legacy_date_window')] + [string]$CanaryTagFamily = 'semver', + [Parameter()] [string]$OutputPath = '' ) @@ -64,23 +68,148 @@ foreach ($requiredScript in @($rollbackDrillScript, $dispatchWorkflowScript, $wa } function Parse-ReleaseTagRecord { - param([Parameter(Mandatory = $true)][string]$TagName) + param( + [Parameter(Mandatory = $true)][string]$TagName, + [Parameter(Mandatory = $true)][bool]$IsPrerelease + ) - $match = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') - if (-not $match.Success) { + $legacyMatch = [regex]::Match($TagName, '^v0\.(?\d{8})\.(?\d+)$') + if ($legacyMatch.Success) { + $legacySequence = 0 + if (-not [int]::TryParse([string]$legacyMatch.Groups['sequence'].Value, [ref]$legacySequence)) { + return $null + } + + $legacyChannel = 'unknown' + if ($legacySequence -ge 1 -and $legacySequence -le 49 -and $IsPrerelease) { + $legacyChannel = 'canary' + } elseif ($legacySequence -ge 50 -and $legacySequence -le 79 -and $IsPrerelease) { + $legacyChannel = 'prerelease' + } elseif ($legacySequence -ge 80 -and $legacySequence -le 99 -and -not $IsPrerelease) { + $legacyChannel = 'stable' + } + + return [pscustomobject]@{ + tag_name = $TagName + tag_family = 'legacy_date_window' + channel = $legacyChannel + major = 0 + minor = 0 + patch = 0 + prerelease_sequence = 0 + date = [string]$legacyMatch.Groups['date'].Value + sequence = $legacySequence + is_prerelease = $IsPrerelease + } + } + + $semverMatch = [regex]::Match( + $TagName, + '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)(?:-(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$' + ) + if (-not $semverMatch.Success) { return $null } + $prereleaseLabel = [string]$semverMatch.Groups['prerelease'].Value + $channel = 'stable' $sequence = 0 - if (-not [int]::TryParse([string]$match.Groups['sequence'].Value, [ref]$sequence)) { - return $null + if (-not [string]::IsNullOrWhiteSpace($prereleaseLabel)) { + if ($prereleaseLabel -match '(?i)(^|[.\-])canary([.\-]|$)') { + $channel = 'canary' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'canary' + } else { + $channel = 'prerelease' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'rc' + } } return [pscustomobject]@{ tag_name = $TagName - date = [string]$match.Groups['date'].Value - sequence = $sequence + tag_family = 'semver' + channel = $channel + major = [int]$semverMatch.Groups['major'].Value + minor = [int]$semverMatch.Groups['minor'].Value + patch = [int]$semverMatch.Groups['patch'].Value + prerelease_sequence = $sequence + date = '' + sequence = 0 + is_prerelease = $IsPrerelease + } +} + +function Get-SequenceFromLabel { + param( + [Parameter(Mandatory = $true)][string]$Label, + [Parameter(Mandatory = $true)][string]$Token + ) + + $pattern = "(?i)(?:^|[.-]){0}[.-](?\d+)(?:$|[.-])" -f [regex]::Escape($Token) + $match = [regex]::Match($Label, $pattern) + if (-not $match.Success) { + return 0 + } + + $value = 0 + if (-not [int]::TryParse([string]$match.Groups['n'].Value, [ref]$value)) { + return 0 + } + + return $value +} + +function New-CoreVersion { + param( + [Parameter(Mandatory = $true)][int]$Major, + [Parameter(Mandatory = $true)][int]$Minor, + [Parameter(Mandatory = $true)][int]$Patch + ) + + return [pscustomobject]@{ + major = $Major + minor = $Minor + patch = $Patch + } +} + +function Format-CoreVersion { + param([Parameter(Mandatory = $true)]$Core) + return "{0}.{1}.{2}" -f [int]$Core.major, [int]$Core.minor, [int]$Core.patch +} + +function Compare-CoreVersion { + param( + [Parameter(Mandatory = $true)]$Left, + [Parameter(Mandatory = $true)]$Right + ) + + foreach ($part in @('major', 'minor', 'patch')) { + $l = [int]$Left.$part + $r = [int]$Right.$part + if ($l -gt $r) { return 1 } + if ($l -lt $r) { return -1 } + } + + return 0 +} + +function Get-MaxCoreVersion { + param([Parameter(Mandatory = $true)][object[]]$Records) + + $maxCore = $null + foreach ($record in @($Records)) { + $candidate = New-CoreVersion -Major ([int]$record.major) -Minor ([int]$record.minor) -Patch ([int]$record.patch) + if ($null -eq $maxCore) { + $maxCore = $candidate + continue + } + + if ((Compare-CoreVersion -Left $candidate -Right $maxCore) -gt 0) { + $maxCore = $candidate + } } + + return $maxCore } function Get-NextCanaryTag { @@ -88,49 +217,105 @@ function Get-NextCanaryTag { [Parameter(Mandatory = $true)][string]$TargetRepository, [Parameter(Mandatory = $true)][int]$MaxReleases, [Parameter(Mandatory = $true)][int]$RangeMin, - [Parameter(Mandatory = $true)][int]$RangeMax + [Parameter(Mandatory = $true)][int]$RangeMax, + [Parameter(Mandatory = $true)][string]$TagFamily ) - if ($RangeMin -gt $RangeMax) { - throw "canary_range_invalid: min=$RangeMin max=$RangeMax" - } - - $dateKey = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') $releases = @(Get-GhReleasesPortable -Repository $TargetRepository -Limit $MaxReleases -ExcludeDrafts) - $matched = @() - foreach ($release in $releases) { - if (-not [bool]$release.isPrerelease) { - continue + + if ([string]$TagFamily -eq 'legacy_date_window') { + if ($RangeMin -gt $RangeMax) { + throw "canary_range_invalid: min=$RangeMin max=$RangeMax" } - $parsed = Parse-ReleaseTagRecord -TagName ([string]$release.tagName) - if ($null -eq $parsed) { - continue + $dateKey = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') + $matched = @() + foreach ($release in $releases) { + if (-not [bool]$release.isPrerelease) { + continue + } + + $parsed = Parse-ReleaseTagRecord -TagName ([string]$release.tagName) -IsPrerelease ([bool]$release.isPrerelease) + if ($null -eq $parsed -or [string]$parsed.tag_family -ne 'legacy_date_window') { + continue + } + if ([string]$parsed.date -ne $dateKey) { + continue + } + if ([int]$parsed.sequence -lt $RangeMin -or [int]$parsed.sequence -gt $RangeMax) { + continue + } + + $matched += [int]$parsed.sequence } - if ([string]$parsed.date -ne $dateKey) { - continue + + $nextSequence = if (@($matched).Count -eq 0) { + $RangeMin + } else { + ((@($matched) | Measure-Object -Maximum).Maximum + 1) } - if ([int]$parsed.sequence -lt $RangeMin -or [int]$parsed.sequence -gt $RangeMax) { - continue + + if ($nextSequence -gt $RangeMax) { + throw "canary_tag_range_exhausted: date=$dateKey next_sequence=$nextSequence range_max=$RangeMax" } - $matched += [int]$parsed.sequence + return [pscustomobject]@{ + tag_family = 'legacy_date_window' + date_key = $dateKey + core = '' + sequence = $nextSequence + tag = "v0.$dateKey.$nextSequence" + } } - $nextSequence = if (@($matched).Count -eq 0) { - $RangeMin + $allRecords = @( + $releases | + ForEach-Object { Parse-ReleaseTagRecord -TagName ([string]$_.tagName) -IsPrerelease ([bool]$_.isPrerelease) } | + Where-Object { $null -ne $_ } + ) + $semverRecords = @($allRecords | Where-Object { [string]$_.tag_family -eq 'semver' }) + $stableSemver = @($semverRecords | Where-Object { [string]$_.channel -eq 'stable' }) + $nonStableSemver = @($semverRecords | Where-Object { [string]$_.channel -eq 'canary' -or [string]$_.channel -eq 'prerelease' }) + + $latestStableCore = Get-MaxCoreVersion -Records $stableSemver + $latestNonStableCore = Get-MaxCoreVersion -Records $nonStableSemver + + $targetCore = $null + if ($null -ne $latestNonStableCore -and (($null -eq $latestStableCore) -or ((Compare-CoreVersion -Left $latestNonStableCore -Right $latestStableCore) -gt 0))) { + $targetCore = $latestNonStableCore + } elseif ($null -ne $latestStableCore) { + $targetCore = New-CoreVersion -Major ([int]$latestStableCore.major) -Minor ([int]$latestStableCore.minor) -Patch ([int]$latestStableCore.patch + 1) + } elseif ($null -ne $latestNonStableCore) { + $targetCore = $latestNonStableCore } else { - ((@($matched) | Measure-Object -Maximum).Maximum + 1) + $targetCore = New-CoreVersion -Major 0 -Minor 1 -Patch 0 } - if ($nextSequence -gt $RangeMax) { - throw "canary_tag_range_exhausted: date=$dateKey next_sequence=$nextSequence range_max=$RangeMax" + $matchedSemverCanary = @( + $semverRecords | + Where-Object { + ([string]$_.channel -eq 'canary') -and + ([int]$_.major -eq [int]$targetCore.major) -and + ([int]$_.minor -eq [int]$targetCore.minor) -and + ([int]$_.patch -eq [int]$targetCore.patch) + } | + ForEach-Object { [int]$_.prerelease_sequence } + ) + $nextCanarySequence = if (@($matchedSemverCanary).Count -eq 0) { + 1 + } else { + ((@($matchedSemverCanary) | Measure-Object -Maximum).Maximum + 1) + } + if ($nextCanarySequence -gt 9999) { + throw "semver_prerelease_sequence_exhausted: channel=canary core=$(Format-CoreVersion -Core $targetCore) next_sequence=$nextCanarySequence" } return [pscustomobject]@{ - date_key = $dateKey - sequence = $nextSequence - tag = "v0.$dateKey.$nextSequence" + tag_family = 'semver' + date_key = '' + core = (Format-CoreVersion -Core $targetCore) + sequence = $nextCanarySequence + tag = "v$(Format-CoreVersion -Core $targetCore)-canary.$nextCanarySequence" } } @@ -205,6 +390,7 @@ $report = [ordered]@{ watch_timeout_minutes = $WatchTimeoutMinutes canary_sequence_min = $CanarySequenceMin canary_sequence_max = $CanarySequenceMax + canary_tag_family = $CanaryTagFamily status = 'fail' reason_code = '' message = '' @@ -272,7 +458,8 @@ try { -TargetRepository $Repository ` -MaxReleases $ReleaseLimit ` -RangeMin $CanarySequenceMin ` - -RangeMax $CanarySequenceMax + -RangeMax $CanarySequenceMax ` + -TagFamily $CanaryTagFamily $attemptRecord.target_tag = [string]$targetTagRecord.tag $dispatchPath = Join-Path $scratchRoot ("attempt-{0}-dispatch.json" -f $attempt) diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 8735077..0b6c9ff 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -45,20 +45,19 @@ Describe 'Release control plane workflow contract' { $script:workflowContent | Should -Match 'contents:\s*write' } - It 'implements mode sequencing, promotion guards, and deterministic tag ranges' { + It 'implements mode sequencing, semver promotion guards, and semver tag planning' { $script:runtimeContent | Should -Match "ValidateSet\('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle'\)" - $script:runtimeContent | Should -Match 'range_min = 1' - $script:runtimeContent | Should -Match 'range_max = 49' - $script:runtimeContent | Should -Match 'range_min = 50' - $script:runtimeContent | Should -Match 'range_max = 79' - $script:runtimeContent | Should -Match 'range_min = 80' - $script:runtimeContent | Should -Match 'range_max = 99' + $script:runtimeContent | Should -Match 'Resolve-CanaryTargetSemVer' + $script:runtimeContent | Should -Match 'Resolve-PromotedTargetSemVer' + $script:runtimeContent | Should -Match 'tag_strategy = ''semver''' + $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' $script:runtimeContent | Should -Match 'promotion_source_missing' $script:runtimeContent | Should -Match 'promotion_source_asset_missing' $script:runtimeContent | Should -Match 'promotion_source_not_at_head' - $script:runtimeContent | Should -Match 'release_tag_range_exhausted' + $script:runtimeContent | Should -Match 'stable_already_published' $script:runtimeContent | Should -Match '\[tag_migration_warning\]' - $script:runtimeContent | Should -Match "tag_family = 'legacy_date_window'" + $script:runtimeContent | Should -Match "tag_family = 'semver'" + $script:runtimeContent | Should -Match '-TagFamily semver' $script:runtimeContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' $script:runtimeContent | Should -Match '\$dispatchInputs = @\(' $script:runtimeContent | Should -Match '-Inputs \$dispatchInputs' diff --git a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 index 2b753d4..052769a 100644 --- a/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1 @@ -44,6 +44,9 @@ Describe 'Release rollback drill workflow contract' { It 'validates channel-specific release history and required rollback assets' { $script:runtimeContent | Should -Match "ValidateSet\('stable', 'prerelease', 'canary'\)" $script:runtimeContent | Should -Match 'AllowEmptyCollection' + $script:runtimeContent | Should -Match 'semver_preferred_dual_mode' + $script:runtimeContent | Should -Match 'candidate_tag_family_selected' + $script:runtimeContent | Should -Match 'SemVer candidates were selected for rollback drill' $script:runtimeContent | Should -Match 'rollback_candidate_missing' $script:runtimeContent | Should -Match 'rollback_assets_missing' $script:runtimeContent | Should -Match 'lvie-cdev-workspace-installer\.exe' @@ -61,6 +64,10 @@ Describe 'Release rollback drill workflow contract' { $script:selfHealingContent | Should -Match '-Inputs \$dispatchInputs' $script:selfHealingContent | Should -Match 'release-workspace-installer\.yml' $script:selfHealingContent | Should -Match 'release_channel=canary' + $script:selfHealingContent | Should -Match "ValidateSet\('semver', 'legacy_date_window'\)" + $script:selfHealingContent | Should -Match 'canary_tag_family' + $script:selfHealingContent | Should -Match 'v\$\(' + $script:selfHealingContent | Should -Match '-canary\.\$nextCanarySequence' $script:selfHealingContent | Should -Match 'allow_existing_tag=false' $script:selfHealingContent | Should -Match 'rollback_candidate_missing' $script:selfHealingContent | Should -Match 'rollback_assets_missing' From 463d1916e3d68451596365f2ac54fcc56da836ea Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 21:09:13 -0800 Subject: [PATCH 34/60] fix(control-plane): bootstrap semver canary planning and add semver gate policy --- AGENTS.md | 2 + README.md | 3 +- scripts/Invoke-ReleaseControlPlane.ps1 | 101 ++++++++++++++++-- scripts/Invoke-RollbackDrillSelfHealing.ps1 | 2 +- scripts/Test-PolicyContracts.ps1 | 4 + scripts/Test-ReleaseClientContracts.ps1 | 4 + tests/ReleaseClientPolicyContract.Tests.ps1 | 5 + ...easeControlPlaneWorkflowContract.Tests.ps1 | 4 + tests/WorkspaceSurfaceContract.Tests.ps1 | 4 + .../workspace-governance.json | 5 + workspace-governance.json | 5 + 11 files changed, 131 insertions(+), 8 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 25c59ce..9edc52a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -225,7 +225,9 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - stable: `vX.Y.Z` (promoted from semver prerelease) - Release-control-plane canary hygiene invocation must enforce `TagFamily=semver`. - Legacy date-window tags (`v0.YYYYMMDD.N`) may still exist during migration but are non-canonical for control-plane dispatch. +- Control-plane tag strategy policy must define `ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`) and keep it aligned with signature grace-end during dual-mode transition. - When legacy tags are observed, control-plane execution must emit deterministic migration warnings. +- After `semver_only_enforce_utc`, legacy tag presence must fail control-plane execution with `semver_only_enforcement_violation`. - Promotion must gate on source release integrity (required assets + source commit equals branch head). - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. - `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Invoke-OpsSloSelfHealing.ps1`. diff --git a/README.md b/README.md index 24b5b48..a9598ef 100644 --- a/README.md +++ b/README.md @@ -394,7 +394,8 @@ Control-plane behavior: - stable: `vX.Y.Z` (promoted from latest semver prerelease on Monday window) 3. Verifies run completion and promotion source integrity (`assets + source commit == branch head`). 4. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. -5. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present. +5. Reads SemVer gate policy from `installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`). +6. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index d23c507..e2fbf72 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -58,6 +58,87 @@ foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $dispatch } } +function Resolve-SemVerEnforcementPolicy { + param( + [Parameter(Mandatory = $true)][string]$ManifestPath, + [Parameter(Mandatory = $true)][DateTimeOffset]$FallbackEnforceUtc + ) + + $warnings = [System.Collections.Generic.List[string]]::new() + $policy = [ordered]@{ + semver_only_enforce_utc = $FallbackEnforceUtc + source = 'default' + warnings = @() + } + + if (-not (Test-Path -LiteralPath $ManifestPath -PathType Leaf)) { + [void]$warnings.Add("workspace_governance_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + try { + $manifest = Get-Content -LiteralPath $ManifestPath -Raw | ConvertFrom-Json -Depth 100 + $candidateValue = $manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc + if ($null -eq $candidateValue) { + [void]$warnings.Add("semver_only_enforce_utc_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + if ($candidateValue -is [DateTimeOffset]) { + $policy.semver_only_enforce_utc = ([DateTimeOffset]$candidateValue).ToUniversalTime() + $policy.source = 'workspace_governance' + $policy.warnings = @($warnings) + return $policy + } + + if ($candidateValue -is [DateTime]) { + $candidateDate = [DateTime]$candidateValue + if ($candidateDate.Kind -eq [DateTimeKind]::Unspecified) { + $candidateDate = [DateTime]::SpecifyKind($candidateDate, [DateTimeKind]::Utc) + } + $policy.semver_only_enforce_utc = ([DateTimeOffset]$candidateDate).ToUniversalTime() + $policy.source = 'workspace_governance' + $policy.warnings = @($warnings) + return $policy + } + + $candidate = [string]$candidateValue + if ([string]::IsNullOrWhiteSpace($candidate)) { + [void]$warnings.Add("semver_only_enforce_utc_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + $parsed = [DateTimeOffset]::MinValue + $parseStyles = [Globalization.DateTimeStyles]::AssumeUniversal -bor [Globalization.DateTimeStyles]::AdjustToUniversal + if (-not [DateTimeOffset]::TryParse($candidate, [Globalization.CultureInfo]::InvariantCulture, $parseStyles, [ref]$parsed)) { + [void]$warnings.Add("semver_only_enforce_utc_invalid: value=$candidate") + $policy.warnings = @($warnings) + return $policy + } + + $policy.semver_only_enforce_utc = $parsed + $policy.source = 'workspace_governance' + } catch { + [void]$warnings.Add("semver_policy_load_failed: $([string]$_.Exception.Message)") + } + + $policy.warnings = @($warnings) + return $policy +} + +$defaultSemverOnlyEnforceUtc = [DateTimeOffset]::Parse('2026-07-01T00:00:00Z') +$workspaceGovernancePath = Join-Path (Split-Path -Parent $PSScriptRoot) 'workspace-governance.json' +$semverPolicy = Resolve-SemVerEnforcementPolicy -ManifestPath $workspaceGovernancePath -FallbackEnforceUtc $defaultSemverOnlyEnforceUtc +$script:semverOnlyEnforceUtc = [DateTimeOffset]$semverPolicy.semver_only_enforce_utc +$script:semverPolicySource = [string]$semverPolicy.source +$script:semverOnlyEnforced = ([DateTimeOffset]::UtcNow -ge $script:semverOnlyEnforceUtc) +foreach ($warning in @($semverPolicy.warnings)) { + Write-Warning "[semver_policy_warning] $warning" +} + function Get-ModeConfig { param([Parameter(Mandatory = $true)][string]$ModeName) @@ -136,7 +217,7 @@ function Compare-CoreVersion { } function Get-MaxCoreVersion { - param([Parameter(Mandatory = $true)][object[]]$Records) + param([Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @()) $maxCore = $null foreach ($record in @($Records)) { @@ -272,7 +353,7 @@ function Convert-ReleaseToRecord { function Get-LatestSemVerRecordByChannel { param( - [Parameter(Mandatory = $true)][object[]]$Records, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @(), [Parameter(Mandatory = $true)][string]$Channel ) @@ -292,7 +373,7 @@ function Get-LatestSemVerRecordByChannel { function Get-MaxPrereleaseSequenceForCore { param( - [Parameter(Mandatory = $true)][object[]]$Records, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @(), [Parameter(Mandatory = $true)]$Core, [Parameter(Mandatory = $true)][string]$Channel ) @@ -316,7 +397,7 @@ function Get-MaxPrereleaseSequenceForCore { } function Resolve-CanaryTargetSemVer { - param([Parameter(Mandatory = $true)][object[]]$Records) + param([Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @()) $semverRecords = @($Records | Where-Object { [string]$_.tag_family -eq 'semver' }) $stableRecords = @($semverRecords | Where-Object { [string]$_.channel -eq 'stable' }) @@ -346,12 +427,14 @@ function Resolve-CanaryTargetSemVer { core = $targetCore prerelease_sequence = $nextCanarySequence tag = "v$(Format-CoreVersion -Core $targetCore)-canary.$nextCanarySequence" + skipped = $false + reason_code = '' } } function Resolve-PromotedTargetSemVer { param( - [Parameter(Mandatory = $true)][object[]]$Records, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @(), [Parameter(Mandatory = $true)][string]$TargetChannel, [Parameter(Mandatory = $true)]$SourceCore ) @@ -426,7 +509,10 @@ function Invoke-ReleaseMode { $migrationWarnings = @() if (@($legacyRecords).Count -gt 0) { - $migrationWarnings += "Legacy date-window release tags remain present in '$Repository'. Control-plane dispatch now targets SemVer channel tags." + if ($script:semverOnlyEnforced) { + throw "semver_only_enforcement_violation: semver_only_enforce_utc=$($script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ')) legacy_tag_count=$(@($legacyRecords).Count)" + } + $migrationWarnings += "Legacy date-window release tags remain present in '$Repository'. Control-plane dispatch now targets SemVer channel tags and legacy compatibility ends at $($script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ'))." } $sourceRecord = $null @@ -604,6 +690,9 @@ $report = [ordered]@{ keep_latest_canary_n = $KeepLatestCanaryN tag_strategy = 'semver' migration_mode = 'dual_mode_publish_semver_control_plane' + semver_policy_source = $script:semverPolicySource + semver_only_enforce_utc = $script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ') + semver_only_enforced = [bool]$script:semverOnlyEnforced status = 'fail' reason_code = '' message = '' diff --git a/scripts/Invoke-RollbackDrillSelfHealing.ps1 b/scripts/Invoke-RollbackDrillSelfHealing.ps1 index 5099441..63a4059 100644 --- a/scripts/Invoke-RollbackDrillSelfHealing.ps1 +++ b/scripts/Invoke-RollbackDrillSelfHealing.ps1 @@ -194,7 +194,7 @@ function Compare-CoreVersion { } function Get-MaxCoreVersion { - param([Parameter(Mandatory = $true)][object[]]$Records) + param([Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @()) $maxCore = $null foreach ($record in @($Records)) { diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 63575aa..8958e04 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -177,6 +177,10 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_mode' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode -eq 'dual-mode-semver-preferred') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_matches_signature_grace_end' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq ([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) -Detail ("semver_only_enforce_utc={0}; signature_grace_end_utc={1}" -f [string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc, [string]$releaseClient.signature_policy.grace_end_utc) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 61584ee..c6dec73 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -101,6 +101,10 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_incident_auto_close_on_recovery' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Name 'ops_policy_incident_reopen_on_regression' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) Add-Check -Name 'ops_policy_incident_title_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Control Plane Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) + Add-Check -Name 'ops_policy_tag_strategy_mode' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode -eq 'dual-mode-semver-preferred') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode) + Add-Check -Name 'ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) + Add-Check -Name 'ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) + Add-Check -Name 'ops_policy_tag_strategy_matches_signature_grace_end' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq ([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) -Detail ("semver_only_enforce_utc={0}; signature_grace_end_utc={1}" -f [string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc, [string]$releaseClient.signature_policy.grace_end_utc) Add-Check -Name 'ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) Add-Check -Name 'ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) Add-Check -Name 'ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index 6ff6fe5..d0141a7 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -60,6 +60,10 @@ Describe 'Release client policy contract' { @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'release-control-plane' $releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery | Should -BeTrue $releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression | Should -BeTrue + $releaseClient.ops_control_plane_policy.tag_strategy.mode | Should -Be 'dual-mode-semver-preferred' + $releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family | Should -Be 'legacy_date_window' + ([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-07-01T00:00:00Z' + ([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be (([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops SLO Gate Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' @@ -93,6 +97,7 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'runtime_images_ops_runtime_base_digest' $script:policyScriptContent | Should -Match 'ops_control_plane_policy_exists' $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' + $script:policyScriptContent | Should -Match 'ops_policy_tag_strategy_semver_only_enforce' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_enabled' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_rollback_workflow' $script:policyScriptContent | Should -Match 'ops_policy_rollback_release_limit' diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 0b6c9ff..008844b 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -49,7 +49,11 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match "ValidateSet\('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle'\)" $script:runtimeContent | Should -Match 'Resolve-CanaryTargetSemVer' $script:runtimeContent | Should -Match 'Resolve-PromotedTargetSemVer' + $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' + $script:runtimeContent | Should -Match 'AllowEmptyCollection' $script:runtimeContent | Should -Match 'tag_strategy = ''semver''' + $script:runtimeContent | Should -Match 'semver_only_enforce_utc' + $script:runtimeContent | Should -Match 'semver_only_enforcement_violation' $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' $script:runtimeContent | Should -Match 'promotion_source_missing' $script:runtimeContent | Should -Match 'promotion_source_asset_missing' diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 8d18717..505c433 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -279,6 +279,10 @@ Describe 'Workspace surface contract' { (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.reopen_on_regression | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.mode | Should -Be 'dual-mode-semver-preferred' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.legacy_tag_family | Should -Be 'legacy_date_window' + ([DateTime]$script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-07-01T00:00:00Z' + ([DateTime]$script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be (([DateTime]$script:manifest.installer_contract.release_client.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops SLO Gate Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 45b45a5..9e9161f 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -276,6 +276,11 @@ "Release Rollback Drill Alert" ] }, + "tag_strategy": { + "mode": "dual-mode-semver-preferred", + "legacy_tag_family": "legacy_date_window", + "semver_only_enforce_utc": "2026-07-01T00:00:00Z" + }, "self_healing": { "enabled": true, "max_attempts": 1, diff --git a/workspace-governance.json b/workspace-governance.json index 45b45a5..9e9161f 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -276,6 +276,11 @@ "Release Rollback Drill Alert" ] }, + "tag_strategy": { + "mode": "dual-mode-semver-preferred", + "legacy_tag_family": "legacy_date_window", + "semver_only_enforce_utc": "2026-07-01T00:00:00Z" + }, "self_healing": { "enabled": true, "max_attempts": 1, From 410e5677b5a6ce66b8dc3c386e892a7edaf4fb01 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 21:11:32 -0800 Subject: [PATCH 35/60] fix(control-plane): serialize prerelease dispatch input deterministically --- scripts/Invoke-ReleaseControlPlane.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index e2fbf72..dd418aa 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -628,7 +628,7 @@ function Invoke-ReleaseMode { $dispatchInputs = @( "release_tag=$targetTag", 'allow_existing_tag=false', - "prerelease=$([string]([bool]$modeConfig.prerelease).ToLowerInvariant())", + "prerelease=$(([string]([bool]$modeConfig.prerelease)).ToLowerInvariant())", "release_channel=$([string]$modeConfig.channel)" ) & $dispatchWorkflowScript ` From da6cb054af717de6b0c14d3eff3076c5795406e2 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 21:22:53 -0800 Subject: [PATCH 36/60] feat(control-plane): harden canary with release verification and mapped failures --- AGENTS.md | 12 ++ README.md | 18 +- .../runbooks/release-ops-incident-response.md | 48 +++++ scripts/Invoke-ReleaseControlPlane.ps1 | 197 +++++++++++++++--- ...easeControlPlaneWorkflowContract.Tests.ps1 | 6 + tests/ScopeAOpsRunbookContract.Tests.ps1 | 3 + 6 files changed, 252 insertions(+), 32 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9edc52a..6de6bf3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -229,6 +229,18 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - When legacy tags are observed, control-plane execution must emit deterministic migration warnings. - After `semver_only_enforce_utc`, legacy tag presence must fail control-plane execution with `semver_only_enforcement_violation`. - Promotion must gate on source release integrity (required assets + source commit equals branch head). +- Control-plane must verify every dispatched release before completion: required release assets and `release-manifest.json` (`release_tag`, `channel`, provenance asset names). +- Release-control-plane top-level reason codes must remain explicit: + - `ops_health_gate_failed` + - `ops_unhealthy` + - `promotion_source_missing` + - `promotion_source_asset_missing` + - `promotion_source_not_at_head` + - `release_dispatch_watch_failed` + - `release_verification_failed` + - `canary_hygiene_failed` + - `semver_only_enforcement_violation` + - `control_plane_runtime_error` - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. - `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Invoke-OpsSloSelfHealing.ps1`. - SLO self-healing reason codes must remain explicit: diff --git a/README.md b/README.md index a9598ef..6c9551b 100644 --- a/README.md +++ b/README.md @@ -393,9 +393,21 @@ Control-plane behavior: - prerelease: `vX.Y.Z-rc.N` (promoted from latest semver canary) - stable: `vX.Y.Z` (promoted from latest semver prerelease on Monday window) 3. Verifies run completion and promotion source integrity (`assets + source commit == branch head`). -4. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. -5. Reads SemVer gate policy from `installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`). -6. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. +4. Performs post-dispatch release verification (`required assets + release-manifest channel/tag/provenance checks`). +5. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. +6. Reads SemVer gate policy from `installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`). +7. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. + +Top-level release-control-plane deterministic failure reason codes include: +- `ops_health_gate_failed` +- `ops_unhealthy` +- `promotion_source_missing` +- `promotion_source_asset_missing` +- `promotion_source_not_at_head` +- `release_dispatch_watch_failed` +- `release_verification_failed` +- `canary_hygiene_failed` +- `semver_only_enforcement_violation` `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index cca40a5..4231321 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -27,6 +27,10 @@ Reason code mapping: - `sync_guard_stale`: latest successful sync-guard run exceeded max-age policy. - `sync_guard_missing`: no sync-guard run found for branch. - `sync_guard_incomplete`: only in-progress/queued runs exist; no completed run yet. +- `release_dispatch_watch_failed`: release workflow dispatch completed but run conclusion was not `success`. +- `release_verification_failed`: post-dispatch release verification failed (missing assets or invalid `release-manifest.json` metadata). +- `canary_hygiene_failed`: SemVer canary retention cleanup failed after publish. +- `semver_only_enforcement_violation`: legacy date-window tags still present after SemVer-only enforcement gate. ## Runner Unavailable Remediation 1. Verify repository runner state: @@ -123,6 +127,50 @@ gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cde -f dry_run=true ``` +## Release Verification Failure Remediation +Use this when `reason_code=release_verification_failed` from `release-control-plane`. + +1. Download control-plane report and capture target tag: + +```powershell +gh run download ` + -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -D .\tmp-rcp-report +Get-Content .\tmp-rcp-report\release-control-plane-report-\release-control-plane-report.json -Raw +``` + +2. Verify release asset contract for the failed tag: + +```powershell +gh release view -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + --json tagName,isPrerelease,publishedAt,targetCommitish,assets,url +``` + +3. Verify `release-manifest.json` fields: + +```powershell +gh release download -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -p release-manifest.json -D .\tmp-release-manifest +Get-Content .\tmp-release-manifest\release-manifest.json -Raw +``` + +Expected minimum: +- `release_tag` equals `` +- `channel` matches release-control-plane mode/channel +- `provenance.assets` contains: + - `workspace-installer.spdx.json` + - `workspace-installer.slsa.json` + - `reproducibility-report.json` + +4. Re-run canary cycle after remediation: + +```powershell +gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f mode=CanaryCycle ` + -f auto_remediate=true ` + -f dry_run=false +``` + ## SLO Gate Dispatch Run strict SLO gate with default 7-day window: diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index dd418aa..1bb3758 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -139,6 +139,135 @@ foreach ($warning in @($semverPolicy.warnings)) { Write-Warning "[semver_policy_warning] $warning" } +$script:releaseRequiredAssets = @( + 'lvie-cdev-workspace-installer.exe', + 'lvie-cdev-workspace-installer.exe.sha256', + 'reproducibility-report.json', + 'workspace-installer.spdx.json', + 'workspace-installer.slsa.json', + 'release-manifest.json' +) + +$script:releaseManifestRequiredProvenanceAssets = @( + 'workspace-installer.spdx.json', + 'workspace-installer.slsa.json', + 'reproducibility-report.json' +) + +function Resolve-ControlPlaneFailureReasonCode { + param([Parameter()][string]$MessageText = '') + + $message = [string]$MessageText + if ($message -match '^required_script_missing') { return 'required_script_missing' } + if ($message -match '^ops_health_gate_failed') { return 'ops_health_gate_failed' } + if ($message -match '^ops_unhealthy') { return 'ops_unhealthy' } + if ($message -match '^unsupported_mode_config|^unsupported_release_mode') { return 'unsupported_mode' } + if ($message -match '^semver_only_enforcement_violation') { return 'semver_only_enforcement_violation' } + if ($message -match '^promotion_source_missing') { return 'promotion_source_missing' } + if ($message -match '^promotion_source_not_prerelease') { return 'promotion_source_not_prerelease' } + if ($message -match '^promotion_source_asset_missing') { return 'promotion_source_asset_missing' } + if ($message -match '^promotion_source_commit_invalid') { return 'promotion_source_commit_invalid' } + if ($message -match '^promotion_source_not_at_head') { return 'promotion_source_not_at_head' } + if ($message -match '^branch_head_unresolved') { return 'branch_head_unresolved' } + if ($message -match '^semver_prerelease_sequence_exhausted') { return 'semver_prerelease_sequence_exhausted' } + if ($message -match '^release_watch_failed|^release_watch_not_success') { return 'release_dispatch_watch_failed' } + if ($message -match '^release_verification_') { return 'release_verification_failed' } + if ($message -match '^canary_hygiene_failed') { return 'canary_hygiene_failed' } + if ($message -match '^gh_command_failed') { return 'gh_command_failed' } + + return 'control_plane_runtime_error' +} + +function Verify-DispatchedRelease { + param( + [Parameter(Mandatory = $true)][string]$TargetTag, + [Parameter(Mandatory = $true)][string]$ExpectedChannel, + [Parameter(Mandatory = $true)][bool]$ExpectedIsPrerelease, + [Parameter(Mandatory = $true)][string]$ModeName, + [Parameter(Mandatory = $true)][string]$ScratchRoot + ) + + $release = Invoke-GhJson -Arguments @( + 'release', 'view', + $TargetTag, + '-R', $Repository, + '--json', 'tagName,isPrerelease,targetCommitish,publishedAt,assets,url' + ) + if ($null -eq $release) { + throw "release_verification_release_missing: tag=$TargetTag" + } + + $actualTag = [string]$release.tagName + if ([string]::IsNullOrWhiteSpace($actualTag)) { + throw "release_verification_tag_missing: tag=$TargetTag" + } + if ($actualTag -ne $TargetTag) { + throw "release_verification_tag_mismatch: expected=$TargetTag actual=$actualTag" + } + if ([bool]$release.isPrerelease -ne $ExpectedIsPrerelease) { + throw "release_verification_prerelease_mismatch: tag=$TargetTag expected=$ExpectedIsPrerelease actual=$([bool]$release.isPrerelease)" + } + + $assetNames = @($release.assets | ForEach-Object { [string]$_.name }) + foreach ($requiredAsset in @($script:releaseRequiredAssets)) { + if ($assetNames -notcontains $requiredAsset) { + throw "release_verification_asset_missing: tag=$TargetTag asset=$requiredAsset" + } + } + + $manifestDownloadRoot = Join-Path $ScratchRoot "release-manifest-$ModeName-$($TargetTag -replace '[^A-Za-z0-9._-]', '_')" + New-Item -Path $manifestDownloadRoot -ItemType Directory -Force | Out-Null + & gh release download $TargetTag -R $Repository -p 'release-manifest.json' -D $manifestDownloadRoot + $downloadExit = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($downloadExit -ne 0) { + throw "release_verification_manifest_download_failed: tag=$TargetTag exit_code=$downloadExit" + } + + $manifestPath = Join-Path $manifestDownloadRoot 'release-manifest.json' + if (-not (Test-Path -LiteralPath $manifestPath -PathType Leaf)) { + throw "release_verification_manifest_missing: tag=$TargetTag" + } + + $manifest = Get-Content -LiteralPath $manifestPath -Raw | ConvertFrom-Json -Depth 100 + if ([string]$manifest.schema_version -ne '1.0') { + throw "release_verification_manifest_schema_invalid: tag=$TargetTag schema=$([string]$manifest.schema_version)" + } + if ([string]$manifest.repository -ne $Repository) { + throw "release_verification_manifest_repository_mismatch: tag=$TargetTag expected=$Repository actual=$([string]$manifest.repository)" + } + if ([string]$manifest.release_tag -ne $TargetTag) { + throw "release_verification_manifest_tag_mismatch: expected=$TargetTag actual=$([string]$manifest.release_tag)" + } + if ([string]$manifest.channel -ne $ExpectedChannel) { + throw "release_verification_manifest_channel_mismatch: tag=$TargetTag expected=$ExpectedChannel actual=$([string]$manifest.channel)" + } + if ([string]$manifest.installer.name -ne 'lvie-cdev-workspace-installer.exe') { + throw "release_verification_manifest_installer_name_mismatch: tag=$TargetTag actual=$([string]$manifest.installer.name)" + } + if ([string]::IsNullOrWhiteSpace([string]$manifest.installer.sha256)) { + throw "release_verification_manifest_installer_sha_missing: tag=$TargetTag" + } + + $provenanceAssetNames = @($manifest.provenance.assets | ForEach-Object { [string]$_.name }) + foreach ($requiredProvenanceAsset in @($script:releaseManifestRequiredProvenanceAssets)) { + if ($provenanceAssetNames -notcontains $requiredProvenanceAsset) { + throw "release_verification_manifest_provenance_missing: tag=$TargetTag asset=$requiredProvenanceAsset" + } + } + + return [ordered]@{ + status = 'pass' + tag = $TargetTag + channel = $ExpectedChannel + prerelease = $ExpectedIsPrerelease + release_url = [string]$release.url + published_at_utc = [string]$release.publishedAt + release_assets_checked = @($script:releaseRequiredAssets) + manifest_path = $manifestPath + manifest_provenance_assets_checked = @($script:releaseManifestRequiredProvenanceAssets) + } +} + function Get-ModeConfig { param([Parameter(Mandatory = $true)][string]$ModeName) @@ -493,10 +622,18 @@ function Invoke-ReleaseMode { param( [Parameter(Mandatory = $true)][string]$ModeName, [Parameter(Mandatory = $true)][string]$DateKey, - [Parameter(Mandatory = $true)][string]$ScratchRoot, - [Parameter(Mandatory = $true)][hashtable]$ExecutionReport + [Parameter(Mandatory = $true)][string]$ScratchRoot ) + $executionReport = [ordered]@{ + mode = $ModeName + source_release = $null + target_release = $null + dispatch = $null + release_verification = $null + hygiene = $null + } + $modeConfig = Get-ModeConfig -ModeName $ModeName $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) $allRecords = @( @@ -505,7 +642,6 @@ function Invoke-ReleaseMode { Where-Object { $null -ne $_ } ) $legacyRecords = @($allRecords | Where-Object { [string]$_.tag_family -eq 'legacy_date_window' -and [string]$_.channel -ne 'unknown' }) - $semverRecords = @($allRecords | Where-Object { [string]$_.tag_family -eq 'semver' }) $migrationWarnings = @() if (@($legacyRecords).Count -gt 0) { @@ -537,16 +673,8 @@ function Invoke-ReleaseMode { throw "promotion_source_not_prerelease: tag=$sourceTag channel=$([string]$modeConfig.source_channel_for_promotion)" } - $requiredAssets = @( - 'lvie-cdev-workspace-installer.exe', - 'lvie-cdev-workspace-installer.exe.sha256', - 'reproducibility-report.json', - 'workspace-installer.spdx.json', - 'workspace-installer.slsa.json', - 'release-manifest.json' - ) $assetNames = @($sourceRelease.assets | ForEach-Object { [string]$_.name }) - foreach ($requiredAsset in $requiredAssets) { + foreach ($requiredAsset in @($script:releaseRequiredAssets)) { if ($assetNames -notcontains $requiredAsset) { throw "promotion_source_asset_missing: tag=$sourceTag asset=$requiredAsset" } @@ -564,7 +692,7 @@ function Invoke-ReleaseMode { throw "promotion_source_not_at_head: tag=$sourceTag source_sha=$sourceCommit head_sha=$headSha" } - $ExecutionReport.source_release = [ordered]@{ + $executionReport.source_release = [ordered]@{ channel = [string]$modeConfig.source_channel_for_promotion tag = $sourceTag tag_family = 'semver' @@ -590,7 +718,7 @@ function Invoke-ReleaseMode { $targetTag = [string]$targetPlan.tag $targetCoreText = Format-CoreVersion -Core $targetPlan.core - $ExecutionReport.target_release = [ordered]@{ + $executionReport.target_release = [ordered]@{ mode = $ModeName channel = [string]$modeConfig.channel prerelease = [bool]$modeConfig.prerelease @@ -610,18 +738,18 @@ function Invoke-ReleaseMode { } if ([bool]$targetPlan.skipped) { - return + return $executionReport } if ($DryRun) { - $ExecutionReport.dispatch = [ordered]@{ + $executionReport.dispatch = [ordered]@{ status = 'skipped_dry_run' workflow = $ReleaseWorkflowFile branch = $Branch run_id = '' url = '' } - return + return $executionReport } $dispatchReportPath = Join-Path $ScratchRoot "$ModeName-dispatch.json" @@ -650,7 +778,12 @@ function Invoke-ReleaseMode { } $watchReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop - $ExecutionReport.dispatch = [ordered]@{ + $watchConclusion = [string]$watchReport.conclusion + if ($watchConclusion -ne 'success') { + throw "release_watch_not_success: mode=$ModeName run_id=$([string]$dispatchReport.run_id) conclusion=$watchConclusion" + } + + $executionReport.dispatch = [ordered]@{ status = 'success' workflow = $ReleaseWorkflowFile branch = $Branch @@ -659,6 +792,13 @@ function Invoke-ReleaseMode { conclusion = [string]$watchReport.conclusion } + $executionReport.release_verification = Verify-DispatchedRelease ` + -TargetTag $targetTag ` + -ExpectedChannel ([string]$modeConfig.channel) ` + -ExpectedIsPrerelease ([bool]$modeConfig.prerelease) ` + -ModeName $ModeName ` + -ScratchRoot $ScratchRoot + if ($ModeName -eq 'CanaryCycle') { $hygienePath = Join-Path $ScratchRoot 'canary-hygiene.json' & pwsh -NoProfile -File $canaryHygieneScript ` @@ -671,8 +811,10 @@ function Invoke-ReleaseMode { if ($LASTEXITCODE -ne 0) { throw "canary_hygiene_failed: tag_family=semver date=$DateKey exit_code=$LASTEXITCODE" } - $ExecutionReport.hygiene = Get-Content -LiteralPath $hygienePath -Raw | ConvertFrom-Json -ErrorAction Stop + $executionReport.hygiene = Get-Content -LiteralPath $hygienePath -Raw | ConvertFrom-Json -ErrorAction Stop } + + return $executionReport } $scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("release-control-plane-" + [Guid]::NewGuid().ToString('N')) @@ -758,12 +900,10 @@ try { $executionList = [System.Collections.Generic.List[object]]::new() if ($Mode -eq 'FullCycle') { - $canaryExec = [ordered]@{} - Invoke-ReleaseMode -ModeName 'CanaryCycle' -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $canaryExec + $canaryExec = Invoke-ReleaseMode -ModeName 'CanaryCycle' -DateKey $dateKey -ScratchRoot $scratchRoot [void]$executionList.Add($canaryExec) - $prereleaseExec = [ordered]@{} - Invoke-ReleaseMode -ModeName 'PromotePrerelease' -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $prereleaseExec + $prereleaseExec = Invoke-ReleaseMode -ModeName 'PromotePrerelease' -DateKey $dateKey -ScratchRoot $scratchRoot [void]$executionList.Add($prereleaseExec) $stableExec = [ordered]@{ @@ -776,13 +916,11 @@ try { } $dayOfWeekUtc = (Get-Date).ToUniversalTime().DayOfWeek.ToString() if ($dayOfWeekUtc -eq 'Monday') { - $stableExec = [ordered]@{} - Invoke-ReleaseMode -ModeName 'PromoteStable' -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $stableExec + $stableExec = Invoke-ReleaseMode -ModeName 'PromoteStable' -DateKey $dateKey -ScratchRoot $scratchRoot } [void]$executionList.Add($stableExec) } else { - $singleExec = [ordered]@{} - Invoke-ReleaseMode -ModeName $Mode -DateKey $dateKey -ScratchRoot $scratchRoot -ExecutionReport $singleExec + $singleExec = Invoke-ReleaseMode -ModeName $Mode -DateKey $dateKey -ScratchRoot $scratchRoot [void]$executionList.Add($singleExec) } @@ -793,9 +931,10 @@ try { } } catch { + $failureMessage = [string]$_.Exception.Message $report.status = 'fail' - $report.reason_code = 'control_plane_failed' - $report.message = [string]$_.Exception.Message + $report.reason_code = Resolve-ControlPlaneFailureReasonCode -MessageText $failureMessage + $report.message = $failureMessage } finally { Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 008844b..593d565 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -50,11 +50,17 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'Resolve-CanaryTargetSemVer' $script:runtimeContent | Should -Match 'Resolve-PromotedTargetSemVer' $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' + $script:runtimeContent | Should -Match 'Resolve-ControlPlaneFailureReasonCode' + $script:runtimeContent | Should -Match 'Verify-DispatchedRelease' $script:runtimeContent | Should -Match 'AllowEmptyCollection' $script:runtimeContent | Should -Match 'tag_strategy = ''semver''' $script:runtimeContent | Should -Match 'semver_only_enforce_utc' $script:runtimeContent | Should -Match 'semver_only_enforcement_violation' $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' + $script:runtimeContent | Should -Match 'release_watch_not_success' + $script:runtimeContent | Should -Match 'release_verification_asset_missing' + $script:runtimeContent | Should -Match 'release_verification_manifest_channel_mismatch' + $script:runtimeContent | Should -Match 'release_verification_failed' $script:runtimeContent | Should -Match 'promotion_source_missing' $script:runtimeContent | Should -Match 'promotion_source_asset_missing' $script:runtimeContent | Should -Match 'promotion_source_not_at_head' diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index df0f133..d436058 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -32,6 +32,9 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'release-rollback-drill\.yml' $script:runbookContent | Should -Match 'auto_self_heal=false' $script:runbookContent | Should -Match '20260226' + $script:runbookContent | Should -Match 'release_verification_failed' + $script:runbookContent | Should -Match 'release-manifest\.json' + $script:runbookContent | Should -Match 'release_dispatch_watch_failed' } It 'keeps README and AGENTS aligned to Scope A workflows' { From 6535c5a76a590b1350266862c88640e1b36bf241 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 21:27:48 -0800 Subject: [PATCH 37/60] fix(control-plane): suppress child stdout in execution report path --- scripts/Invoke-ReleaseControlPlane.ps1 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 1bb3758..53ffb4c 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -764,7 +764,7 @@ function Invoke-ReleaseMode { -WorkflowFile $ReleaseWorkflowFile ` -Branch $Branch ` -Inputs $dispatchInputs ` - -OutputPath $dispatchReportPath + -OutputPath $dispatchReportPath | Out-Null $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop $watchReportPath = Join-Path $ScratchRoot "$ModeName-watch.json" @@ -772,7 +772,7 @@ function Invoke-ReleaseMode { -Repository $Repository ` -RunId ([string]$dispatchReport.run_id) ` -TimeoutMinutes $WatchTimeoutMinutes ` - -OutputPath $watchReportPath + -OutputPath $watchReportPath | Out-Null if ($LASTEXITCODE -ne 0) { throw "release_watch_failed: mode=$ModeName run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" } @@ -807,7 +807,7 @@ function Invoke-ReleaseMode { -TagFamily semver ` -KeepLatestN $KeepLatestCanaryN ` -Delete ` - -OutputPath $hygienePath + -OutputPath $hygienePath | Out-Null if ($LASTEXITCODE -ne 0) { throw "canary_hygiene_failed: tag_family=semver date=$DateKey exit_code=$LASTEXITCODE" } From d38ce2cc7e5f54ef92e666b5c1874e8ac476e1d6 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 21:37:29 -0800 Subject: [PATCH 38/60] feat(control-plane): harden promotion lineage verification paths --- AGENTS.md | 3 + README.md | 10 +- .../runbooks/release-ops-incident-response.md | 1 + scripts/Invoke-ReleaseControlPlane.ps1 | 121 ++++++++++++++++++ ...easeControlPlaneWorkflowContract.Tests.ps1 | 2 + tests/ScopeAOpsRunbookContract.Tests.ps1 | 1 + 6 files changed, 135 insertions(+), 3 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6de6bf3..8ea30de 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -230,12 +230,15 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - After `semver_only_enforce_utc`, legacy tag presence must fail control-plane execution with `semver_only_enforcement_violation`. - Promotion must gate on source release integrity (required assets + source commit equals branch head). - Control-plane must verify every dispatched release before completion: required release assets and `release-manifest.json` (`release_tag`, `channel`, provenance asset names). +- Control-plane must verify promotion lineage for `PromotePrerelease` and `PromoteStable`: source/target channel, SemVer core equality, and source/target commit SHA equality. - Release-control-plane top-level reason codes must remain explicit: - `ops_health_gate_failed` - `ops_unhealthy` - `promotion_source_missing` + - `promotion_source_not_prerelease` - `promotion_source_asset_missing` - `promotion_source_not_at_head` + - `promotion_lineage_invalid` - `release_dispatch_watch_failed` - `release_verification_failed` - `canary_hygiene_failed` diff --git a/README.md b/README.md index 6c9551b..8e0567e 100644 --- a/README.md +++ b/README.md @@ -394,20 +394,24 @@ Control-plane behavior: - stable: `vX.Y.Z` (promoted from latest semver prerelease on Monday window) 3. Verifies run completion and promotion source integrity (`assets + source commit == branch head`). 4. Performs post-dispatch release verification (`required assets + release-manifest channel/tag/provenance checks`). -5. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. -6. Reads SemVer gate policy from `installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`). -7. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. +5. Verifies promotion lineage for `PromotePrerelease` and `PromoteStable` (`source/target channel + SemVer core + commit SHA`). +6. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. +7. Reads SemVer gate policy from `installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`). +8. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. Top-level release-control-plane deterministic failure reason codes include: - `ops_health_gate_failed` - `ops_unhealthy` - `promotion_source_missing` +- `promotion_source_not_prerelease` - `promotion_source_asset_missing` - `promotion_source_not_at_head` +- `promotion_lineage_invalid` - `release_dispatch_watch_failed` - `release_verification_failed` - `canary_hygiene_failed` - `semver_only_enforcement_violation` +- `control_plane_runtime_error` `weekly-ops-slo-report.yml` emits machine-readable weekly SLO evidence via `scripts/Write-OpsSloReport.ps1`. diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 4231321..8db0cca 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -27,6 +27,7 @@ Reason code mapping: - `sync_guard_stale`: latest successful sync-guard run exceeded max-age policy. - `sync_guard_missing`: no sync-guard run found for branch. - `sync_guard_incomplete`: only in-progress/queued runs exist; no completed run yet. +- `promotion_lineage_invalid`: promotion source/target channel, SemVer core, or commit-SHA lineage check failed. - `release_dispatch_watch_failed`: release workflow dispatch completed but run conclusion was not `success`. - `release_verification_failed`: post-dispatch release verification failed (missing assets or invalid `release-manifest.json` metadata). - `canary_hygiene_failed`: SemVer canary retention cleanup failed after publish. diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 53ffb4c..92a4bca 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -168,6 +168,7 @@ function Resolve-ControlPlaneFailureReasonCode { if ($message -match '^promotion_source_asset_missing') { return 'promotion_source_asset_missing' } if ($message -match '^promotion_source_commit_invalid') { return 'promotion_source_commit_invalid' } if ($message -match '^promotion_source_not_at_head') { return 'promotion_source_not_at_head' } + if ($message -match '^promotion_lineage_invalid') { return 'promotion_lineage_invalid' } if ($message -match '^branch_head_unresolved') { return 'branch_head_unresolved' } if ($message -match '^semver_prerelease_sequence_exhausted') { return 'semver_prerelease_sequence_exhausted' } if ($message -match '^release_watch_failed|^release_watch_not_success') { return 'release_dispatch_watch_failed' } @@ -208,6 +209,14 @@ function Verify-DispatchedRelease { throw "release_verification_prerelease_mismatch: tag=$TargetTag expected=$ExpectedIsPrerelease actual=$([bool]$release.isPrerelease)" } + $parsedTagRecord = Convert-ReleaseToRecord -Release $release + if ($null -eq $parsedTagRecord -or [string]$parsedTagRecord.tag_family -ne 'semver') { + throw "release_verification_tag_not_semver: tag=$TargetTag" + } + if ([string]$parsedTagRecord.channel -ne $ExpectedChannel) { + throw "release_verification_tag_channel_mismatch: tag=$TargetTag expected=$ExpectedChannel actual=$([string]$parsedTagRecord.channel)" + } + $assetNames = @($release.assets | ForEach-Object { [string]$_.name }) foreach ($requiredAsset in @($script:releaseRequiredAssets)) { if ($assetNames -notcontains $requiredAsset) { @@ -259,15 +268,120 @@ function Verify-DispatchedRelease { status = 'pass' tag = $TargetTag channel = $ExpectedChannel + tag_family = 'semver' + core = "{0}.{1}.{2}" -f [int]$parsedTagRecord.major, [int]$parsedTagRecord.minor, [int]$parsedTagRecord.patch + prerelease_sequence = [int]$parsedTagRecord.prerelease_sequence prerelease = $ExpectedIsPrerelease + target_commitish = [string]$release.targetCommitish release_url = [string]$release.url published_at_utc = [string]$release.publishedAt release_assets_checked = @($script:releaseRequiredAssets) manifest_path = $manifestPath + manifest_channel = [string]$manifest.channel + manifest_release_tag = [string]$manifest.release_tag manifest_provenance_assets_checked = @($script:releaseManifestRequiredProvenanceAssets) } } +function Verify-PromotionLineage { + param( + [Parameter(Mandatory = $true)][string]$ModeName, + [Parameter()][AllowNull()]$SourceRelease, + [Parameter()][AllowNull()]$ReleaseVerification + ) + + if ($ModeName -ne 'PromotePrerelease' -and $ModeName -ne 'PromoteStable') { + return [ordered]@{ + status = 'skipped' + mode = $ModeName + reason_code = 'not_promotion_mode' + } + } + + if ($null -eq $SourceRelease) { + throw "promotion_lineage_invalid: mode=$ModeName reason=source_release_missing" + } + if ($null -eq $ReleaseVerification) { + throw "promotion_lineage_invalid: mode=$ModeName reason=release_verification_missing" + } + + $sourceCore = ([string]$SourceRelease.core).Trim() + $targetCore = ([string]$ReleaseVerification.core).Trim() + if ([string]::IsNullOrWhiteSpace($sourceCore) -or [string]::IsNullOrWhiteSpace($targetCore)) { + throw "promotion_lineage_invalid: mode=$ModeName reason=core_missing source_core=$sourceCore target_core=$targetCore" + } + if ($sourceCore -ne $targetCore) { + throw "promotion_lineage_invalid: mode=$ModeName reason=core_mismatch source_core=$sourceCore target_core=$targetCore" + } + + $sourceSha = ([string]$SourceRelease.source_sha).Trim().ToLowerInvariant() + $targetSha = ([string]$ReleaseVerification.target_commitish).Trim().ToLowerInvariant() + if ($sourceSha -notmatch '^[0-9a-f]{40}$') { + throw "promotion_lineage_invalid: mode=$ModeName reason=source_sha_invalid source_sha=$sourceSha" + } + if ($targetSha -notmatch '^[0-9a-f]{40}$') { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_sha_invalid target_sha=$targetSha" + } + if ($sourceSha -ne $targetSha) { + throw "promotion_lineage_invalid: mode=$ModeName reason=sha_mismatch source_sha=$sourceSha target_sha=$targetSha" + } + + $sourceChannel = [string]$SourceRelease.channel + $targetChannel = [string]$ReleaseVerification.channel + $sourcePrereleaseSequence = [int]$SourceRelease.prerelease_sequence + $targetPrereleaseSequence = [int]$ReleaseVerification.prerelease_sequence + $targetIsPrerelease = [bool]$ReleaseVerification.prerelease + + if ($ModeName -eq 'PromotePrerelease') { + if ($sourceChannel -ne 'canary') { + throw "promotion_lineage_invalid: mode=$ModeName reason=source_channel_invalid source_channel=$sourceChannel" + } + if ($targetChannel -ne 'prerelease') { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_channel_invalid target_channel=$targetChannel" + } + if (-not $targetIsPrerelease) { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_prerelease_false" + } + if ($sourcePrereleaseSequence -lt 1) { + throw "promotion_lineage_invalid: mode=$ModeName reason=source_sequence_invalid source_sequence=$sourcePrereleaseSequence" + } + if ($targetPrereleaseSequence -lt 1) { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_sequence_invalid target_sequence=$targetPrereleaseSequence" + } + } + + if ($ModeName -eq 'PromoteStable') { + if ($sourceChannel -ne 'prerelease') { + throw "promotion_lineage_invalid: mode=$ModeName reason=source_channel_invalid source_channel=$sourceChannel" + } + if ($targetChannel -ne 'stable') { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_channel_invalid target_channel=$targetChannel" + } + if ($targetIsPrerelease) { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_prerelease_true" + } + if ($sourcePrereleaseSequence -lt 1) { + throw "promotion_lineage_invalid: mode=$ModeName reason=source_sequence_invalid source_sequence=$sourcePrereleaseSequence" + } + if ($targetPrereleaseSequence -ne 0) { + throw "promotion_lineage_invalid: mode=$ModeName reason=target_sequence_invalid target_sequence=$targetPrereleaseSequence" + } + } + + return [ordered]@{ + status = 'pass' + mode = $ModeName + source_tag = [string]$SourceRelease.tag + source_channel = $sourceChannel + source_core = $sourceCore + source_sha = $sourceSha + target_tag = [string]$ReleaseVerification.tag + target_channel = $targetChannel + target_core = $targetCore + target_sha = $targetSha + } +} + function Get-ModeConfig { param([Parameter(Mandatory = $true)][string]$ModeName) @@ -631,6 +745,7 @@ function Invoke-ReleaseMode { target_release = $null dispatch = $null release_verification = $null + promotion_lineage = $null hygiene = $null } @@ -698,6 +813,7 @@ function Invoke-ReleaseMode { tag_family = 'semver' core = Format-CoreVersion -Core $sourceCore prerelease_sequence = [int]$sourceRecord.prerelease_sequence + prerelease = [bool]$sourceRelease.isPrerelease source_sha = $sourceCommit head_sha = $headSha url = [string]$sourceRelease.url @@ -799,6 +915,11 @@ function Invoke-ReleaseMode { -ModeName $ModeName ` -ScratchRoot $ScratchRoot + $executionReport.promotion_lineage = Verify-PromotionLineage ` + -ModeName $ModeName ` + -SourceRelease $executionReport.source_release ` + -ReleaseVerification $executionReport.release_verification + if ($ModeName -eq 'CanaryCycle') { $hygienePath = Join-Path $ScratchRoot 'canary-hygiene.json' & pwsh -NoProfile -File $canaryHygieneScript ` diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 593d565..e53c88d 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -52,6 +52,7 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' $script:runtimeContent | Should -Match 'Resolve-ControlPlaneFailureReasonCode' $script:runtimeContent | Should -Match 'Verify-DispatchedRelease' + $script:runtimeContent | Should -Match 'Verify-PromotionLineage' $script:runtimeContent | Should -Match 'AllowEmptyCollection' $script:runtimeContent | Should -Match 'tag_strategy = ''semver''' $script:runtimeContent | Should -Match 'semver_only_enforce_utc' @@ -61,6 +62,7 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'release_verification_asset_missing' $script:runtimeContent | Should -Match 'release_verification_manifest_channel_mismatch' $script:runtimeContent | Should -Match 'release_verification_failed' + $script:runtimeContent | Should -Match 'promotion_lineage_invalid' $script:runtimeContent | Should -Match 'promotion_source_missing' $script:runtimeContent | Should -Match 'promotion_source_asset_missing' $script:runtimeContent | Should -Match 'promotion_source_not_at_head' diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index d436058..fdd8df0 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -33,6 +33,7 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'auto_self_heal=false' $script:runbookContent | Should -Match '20260226' $script:runbookContent | Should -Match 'release_verification_failed' + $script:runbookContent | Should -Match 'promotion_lineage_invalid' $script:runbookContent | Should -Match 'release-manifest\.json' $script:runbookContent | Should -Match 'release_dispatch_watch_failed' } From a528de558e0db42d14e6a21b9deb02068d1d165c Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 22:09:15 -0800 Subject: [PATCH 39/60] Harden stable promotion window policy and audited override --- .github/workflows/release-control-plane.yml | 23 ++ AGENTS.md | 11 + README.md | 10 +- .../runbooks/release-ops-incident-response.md | 12 + scripts/Exercise-ReleaseControlPlaneLocal.ps1 | 8 + scripts/Invoke-ReleaseControlPlane.ps1 | 230 +++++++++++++++++- .../Invoke-ReleaseControlPlaneLocalDocker.ps1 | 14 ++ scripts/Test-PolicyContracts.ps1 | 4 + scripts/Test-ReleaseClientContracts.ps1 | 4 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 9 + .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 1 + tests/ReleaseClientPolicyContract.Tests.ps1 | 5 + ...easeControlPlaneWorkflowContract.Tests.ps1 | 7 + tests/ScopeAOpsRunbookContract.Tests.ps1 | 2 + tests/WorkspaceSurfaceContract.Tests.ps1 | 4 + .../workspace-governance.json | 8 + workspace-governance.json | 8 + 17 files changed, 355 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 9cffbe3..89d16c3 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -31,6 +31,16 @@ on: required: false default: '1' type: string + force_stable_promotion_outside_window: + description: Force stable promotion outside policy window for FullCycle (audited). + required: false + default: false + type: boolean + force_stable_promotion_reason: + description: Audit reason when forcing stable promotion outside policy window. + required: false + default: '' + type: string dry_run: description: Run planning and health gates only; do not dispatch releases. required: false @@ -104,6 +114,17 @@ jobs: $dryRun = [System.Convert]::ToBoolean($dryRunText) } + $forceStablePromotionOutsideWindowText = [string]'${{ inputs.force_stable_promotion_outside_window }}' + $forceStablePromotionOutsideWindow = $false + if (-not [string]::IsNullOrWhiteSpace($forceStablePromotionOutsideWindowText)) { + $forceStablePromotionOutsideWindow = [System.Convert]::ToBoolean($forceStablePromotionOutsideWindowText) + } + + $forceStablePromotionReason = [string]'${{ inputs.force_stable_promotion_reason }}' + if ($null -eq $forceStablePromotionReason) { + $forceStablePromotionReason = '' + } + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseControlPlane.ps1 ` -Repository '${{ github.repository }}' ` -Branch 'main' ` @@ -111,6 +132,8 @@ jobs: -SyncGuardMaxAgeHours $syncGuardAgeHours ` -KeepLatestCanaryN $keepLatestCanaryN ` -AutoRemediate:$autoRemediate ` + -ForceStablePromotionOutsideWindow:$forceStablePromotionOutsideWindow ` + -ForceStablePromotionReason $forceStablePromotionReason ` -DryRun:$dryRun ` -OutputPath $reportPath diff --git a/AGENTS.md b/AGENTS.md index 8ea30de..a5d754f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -223,6 +223,15 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - canary: `vX.Y.Z-canary.N` - prerelease: `vX.Y.Z-rc.N` (promoted from semver canary) - stable: `vX.Y.Z` (promoted from semver prerelease) +- Stable promotion window policy must be explicit under `ops_control_plane_policy.stable_promotion_window`: + - `full_cycle_allowed_utc_weekdays` + - `allow_outside_window_with_override` + - `override_reason_required` + - `override_reason_min_length` +- Full-cycle stable promotion must evaluate stable window policy and record deterministic decision codes (`stable_window_open`, `stable_window_closed`, `stable_window_override_applied`) in execution report metadata. +- Emergency stable override is workflow-dispatch only and audited: + - `force_stable_promotion_outside_window` + - `force_stable_promotion_reason` - Release-control-plane canary hygiene invocation must enforce `TagFamily=semver`. - Legacy date-window tags (`v0.YYYYMMDD.N`) may still exist during migration but are non-canonical for control-plane dispatch. - Control-plane tag strategy policy must define `ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`) and keep it aligned with signature grace-end during dual-mode transition. @@ -239,6 +248,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `promotion_source_asset_missing` - `promotion_source_not_at_head` - `promotion_lineage_invalid` + - `stable_window_override_invalid` - `release_dispatch_watch_failed` - `release_verification_failed` - `canary_hygiene_failed` @@ -269,6 +279,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `runtime_images_missing` - `ops_control_plane_policy_missing` - `ops_control_plane_self_healing_missing` + - `ops_control_plane_stable_window_missing` - `policy_drift_runtime_error` - `.github/workflows/release-rollback-drill.yml` must run `scripts/Invoke-RollbackDrillSelfHealing.ps1`. - Rollback self-healing reason codes must remain explicit: diff --git a/README.md b/README.md index 8e0567e..f3e7734 100644 --- a/README.md +++ b/README.md @@ -391,13 +391,17 @@ Control-plane behavior: 2. Dispatches release workflow with deterministic SemVer channel tags: - canary: `vX.Y.Z-canary.N` - prerelease: `vX.Y.Z-rc.N` (promoted from latest semver canary) - - stable: `vX.Y.Z` (promoted from latest semver prerelease on Monday window) + - stable: `vX.Y.Z` (promoted from latest semver prerelease during policy window) 3. Verifies run completion and promotion source integrity (`assets + source commit == branch head`). 4. Performs post-dispatch release verification (`required assets + release-manifest channel/tag/provenance checks`). 5. Verifies promotion lineage for `PromotePrerelease` and `PromoteStable` (`source/target channel + SemVer core + commit SHA`). 6. Applies canary smoke tag hygiene with `tag_family=semver` after canary publish. 7. Reads SemVer gate policy from `installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`). -8. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. +8. Reads stable promotion window policy from `installer_contract.release_client.ops_control_plane_policy.stable_promotion_window` (default: full-cycle Mondays only, override allowed with audited reason). +9. Supports manual emergency override for FullCycle stable promotion via workflow_dispatch inputs: + - `force_stable_promotion_outside_window=true` + - `force_stable_promotion_reason=` +10. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. Top-level release-control-plane deterministic failure reason codes include: - `ops_health_gate_failed` @@ -407,6 +411,7 @@ Top-level release-control-plane deterministic failure reason codes include: - `promotion_source_asset_missing` - `promotion_source_not_at_head` - `promotion_lineage_invalid` +- `stable_window_override_invalid` - `release_dispatch_watch_failed` - `release_verification_failed` - `canary_hygiene_failed` @@ -441,6 +446,7 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `runtime_images_missing` - `ops_control_plane_policy_missing` - `ops_control_plane_self_healing_missing` + - `ops_control_plane_stable_window_missing` `release-rollback-drill.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-RollbackDrillSelfHealing.ps1` to validate deterministic rollback readiness: - channel-scoped latest/previous release candidates diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 8db0cca..e126195 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -28,6 +28,7 @@ Reason code mapping: - `sync_guard_missing`: no sync-guard run found for branch. - `sync_guard_incomplete`: only in-progress/queued runs exist; no completed run yet. - `promotion_lineage_invalid`: promotion source/target channel, SemVer core, or commit-SHA lineage check failed. +- `stable_window_override_invalid`: requested stable override violated stable window policy (override disabled, missing reason, or reason too short). - `release_dispatch_watch_failed`: release workflow dispatch completed but run conclusion was not `success`. - `release_verification_failed`: post-dispatch release verification failed (missing assets or invalid `release-manifest.json` metadata). - `canary_hygiene_failed`: SemVer canary retention cleanup failed after publish. @@ -120,6 +121,17 @@ gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cde -f dry_run=false ``` +Force stable promotion outside window (audited emergency path): + +```powershell +gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f mode=FullCycle ` + -f force_stable_promotion_outside_window=true ` + -f force_stable_promotion_reason="Emergency promotion after incident remediation" ` + -f auto_remediate=true ` + -f dry_run=false +``` + Run validation-only health/policy gate: ```powershell diff --git a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 index 678373a..a857931 100644 --- a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 +++ b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 @@ -30,6 +30,12 @@ param( [Parameter()] [switch]$DryRun, + [Parameter()] + [bool]$ForceStablePromotionOutsideWindow = $false, + + [Parameter()] + [string]$ForceStablePromotionReason = '', + [Parameter()] [switch]$AllowMutatingModes, @@ -132,6 +138,8 @@ try { -SyncGuardMaxAgeHours $SyncGuardMaxAgeHours ` -KeepLatestCanaryN $KeepLatestCanaryN ` -AutoRemediate:$false ` + -ForceStablePromotionOutsideWindow:$ForceStablePromotionOutsideWindow ` + -ForceStablePromotionReason $ForceStablePromotionReason ` -DryRun:$DryRun ` -OutputPath $controlPlanePath if ($LASTEXITCODE -ne 0) { diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 92a4bca..9bb5d23 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -35,6 +35,12 @@ param( [Parameter()] [switch]$DryRun, + [Parameter()] + [bool]$ForceStablePromotionOutsideWindow = $false, + + [Parameter()] + [string]$ForceStablePromotionReason = '', + [Parameter()] [string]$OutputPath = '' ) @@ -129,6 +135,130 @@ function Resolve-SemVerEnforcementPolicy { return $policy } +function Resolve-StablePromotionWindowPolicy { + param( + [Parameter(Mandatory = $true)][string]$ManifestPath + ) + + $warnings = [System.Collections.Generic.List[string]]::new() + $validWeekdays = @('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday') + $policy = [ordered]@{ + full_cycle_allowed_utc_weekdays = @('Monday') + allow_outside_window_with_override = $true + override_reason_required = $true + override_reason_min_length = 12 + source = 'default' + warnings = @() + } + + if (-not (Test-Path -LiteralPath $ManifestPath -PathType Leaf)) { + [void]$warnings.Add("workspace_governance_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + try { + $manifest = Get-Content -LiteralPath $ManifestPath -Raw | ConvertFrom-Json -Depth 100 + $candidateWindow = $manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window + if ($null -eq $candidateWindow) { + [void]$warnings.Add("stable_promotion_window_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + $policy.source = 'workspace_governance' + + $candidateWeekdays = @($candidateWindow.full_cycle_allowed_utc_weekdays) + $normalizedWeekdays = [System.Collections.Generic.List[string]]::new() + foreach ($candidateWeekday in @($candidateWeekdays)) { + $value = ([string]$candidateWeekday).Trim() + if ([string]::IsNullOrWhiteSpace($value)) { + continue + } + + $canonical = @( + $validWeekdays | + Where-Object { [string]::Equals([string]$_, $value, [System.StringComparison]::OrdinalIgnoreCase) } | + Select-Object -First 1 + ) + if (@($canonical).Count -eq 1) { + $day = [string]$canonical[0] + if (-not $normalizedWeekdays.Contains($day)) { + [void]$normalizedWeekdays.Add($day) + } + } else { + [void]$warnings.Add("stable_promotion_window_invalid_weekday: value=$value") + } + } + if ($normalizedWeekdays.Count -gt 0) { + $policy.full_cycle_allowed_utc_weekdays = @($normalizedWeekdays) + } else { + [void]$warnings.Add('stable_promotion_window_weekdays_missing_or_invalid') + } + + $allowOverride = $candidateWindow.allow_outside_window_with_override + if ($allowOverride -is [bool]) { + $policy.allow_outside_window_with_override = [bool]$allowOverride + } elseif ($null -ne $allowOverride) { + $parsedAllowOverride = $false + $allowOverrideParsed = $false + try { + $parsedAllowOverride = [System.Convert]::ToBoolean([string]$allowOverride, [Globalization.CultureInfo]::InvariantCulture) + $allowOverrideParsed = $true + } catch { + $allowOverrideParsed = $false + } + + if ($allowOverrideParsed) { + $policy.allow_outside_window_with_override = $parsedAllowOverride + } else { + [void]$warnings.Add("stable_promotion_window_allow_override_invalid: value=$allowOverride") + } + } else { + [void]$warnings.Add('stable_promotion_window_allow_override_missing') + } + + $reasonRequired = $candidateWindow.override_reason_required + if ($reasonRequired -is [bool]) { + $policy.override_reason_required = [bool]$reasonRequired + } elseif ($null -ne $reasonRequired) { + $parsedReasonRequired = $false + $reasonRequiredParsed = $false + try { + $parsedReasonRequired = [System.Convert]::ToBoolean([string]$reasonRequired, [Globalization.CultureInfo]::InvariantCulture) + $reasonRequiredParsed = $true + } catch { + $reasonRequiredParsed = $false + } + + if ($reasonRequiredParsed) { + $policy.override_reason_required = $parsedReasonRequired + } else { + [void]$warnings.Add("stable_promotion_window_reason_required_invalid: value=$reasonRequired") + } + } else { + [void]$warnings.Add('stable_promotion_window_reason_required_missing') + } + + $reasonMinLength = $candidateWindow.override_reason_min_length + if ($null -ne $reasonMinLength) { + $parsedMinLength = -1 + if ([int]::TryParse(([string]$reasonMinLength).Trim(), [ref]$parsedMinLength) -and $parsedMinLength -ge 0 -and $parsedMinLength -le 512) { + $policy.override_reason_min_length = $parsedMinLength + } else { + [void]$warnings.Add("stable_promotion_window_reason_min_length_invalid: value=$reasonMinLength") + } + } else { + [void]$warnings.Add('stable_promotion_window_reason_min_length_missing') + } + } catch { + [void]$warnings.Add("stable_promotion_window_policy_load_failed: $([string]$_.Exception.Message)") + } + + $policy.warnings = @($warnings) + return $policy +} + $defaultSemverOnlyEnforceUtc = [DateTimeOffset]::Parse('2026-07-01T00:00:00Z') $workspaceGovernancePath = Join-Path (Split-Path -Parent $PSScriptRoot) 'workspace-governance.json' $semverPolicy = Resolve-SemVerEnforcementPolicy -ManifestPath $workspaceGovernancePath -FallbackEnforceUtc $defaultSemverOnlyEnforceUtc @@ -139,6 +269,16 @@ foreach ($warning in @($semverPolicy.warnings)) { Write-Warning "[semver_policy_warning] $warning" } +$stablePromotionWindowPolicy = Resolve-StablePromotionWindowPolicy -ManifestPath $workspaceGovernancePath +$script:stablePromotionWindowPolicySource = [string]$stablePromotionWindowPolicy.source +$script:stablePromotionFullCycleAllowedUtcWeekdays = @($stablePromotionWindowPolicy.full_cycle_allowed_utc_weekdays) +$script:stablePromotionAllowOutsideWindowWithOverride = [bool]$stablePromotionWindowPolicy.allow_outside_window_with_override +$script:stablePromotionOverrideReasonRequired = [bool]$stablePromotionWindowPolicy.override_reason_required +$script:stablePromotionOverrideReasonMinLength = [int]$stablePromotionWindowPolicy.override_reason_min_length +foreach ($warning in @($stablePromotionWindowPolicy.warnings)) { + Write-Warning "[stable_promotion_window_policy_warning] $warning" +} + $script:releaseRequiredAssets = @( 'lvie-cdev-workspace-installer.exe', 'lvie-cdev-workspace-installer.exe.sha256', @@ -169,6 +309,7 @@ function Resolve-ControlPlaneFailureReasonCode { if ($message -match '^promotion_source_commit_invalid') { return 'promotion_source_commit_invalid' } if ($message -match '^promotion_source_not_at_head') { return 'promotion_source_not_at_head' } if ($message -match '^promotion_lineage_invalid') { return 'promotion_lineage_invalid' } + if ($message -match '^stable_window_override_') { return 'stable_window_override_invalid' } if ($message -match '^branch_head_unresolved') { return 'branch_head_unresolved' } if ($message -match '^semver_prerelease_sequence_exhausted') { return 'semver_prerelease_sequence_exhausted' } if ($message -match '^release_watch_failed|^release_watch_not_success') { return 'release_dispatch_watch_failed' } @@ -732,6 +873,69 @@ function Resolve-PromotedTargetSemVer { throw "unsupported_target_channel: $TargetChannel" } +function Resolve-StablePromotionWindowDecision { + param( + [Parameter(Mandatory = $true)][DateTimeOffset]$NowUtc, + [Parameter(Mandatory = $true)][bool]$OverrideRequested, + [Parameter()][string]$OverrideReason = '' + ) + + $allowedWeekdays = @($script:stablePromotionFullCycleAllowedUtcWeekdays | ForEach-Object { ([string]$_).Trim() } | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) + if (@($allowedWeekdays).Count -eq 0) { + $allowedWeekdays = @('Monday') + } + + $currentWeekday = $NowUtc.ToUniversalTime().DayOfWeek.ToString() + $withinWindow = (@($allowedWeekdays | Where-Object { [string]$_ -eq $currentWeekday }).Count -gt 0) + $normalizedReason = ([string]$OverrideReason).Trim() + + $decision = [ordered]@{ + status = 'evaluated' + policy_source = [string]$script:stablePromotionWindowPolicySource + current_utc = $NowUtc.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') + current_utc_weekday = $currentWeekday + full_cycle_allowed_utc_weekdays = @($allowedWeekdays) + within_window = [bool]$withinWindow + override_requested = [bool]$OverrideRequested + override_applied = $false + allow_outside_window_with_override = [bool]$script:stablePromotionAllowOutsideWindowWithOverride + override_reason_required = [bool]$script:stablePromotionOverrideReasonRequired + override_reason_min_length = [int]$script:stablePromotionOverrideReasonMinLength + override_reason = $normalizedReason + can_promote = $false + reason_code = '' + } + + if ($withinWindow) { + $decision.can_promote = $true + $decision.reason_code = 'stable_window_open' + return $decision + } + + if (-not $OverrideRequested) { + $decision.can_promote = $false + $decision.reason_code = 'stable_window_closed' + return $decision + } + + if (-not [bool]$script:stablePromotionAllowOutsideWindowWithOverride) { + throw "stable_window_override_blocked: current_utc_weekday=$currentWeekday" + } + + if ([bool]$script:stablePromotionOverrideReasonRequired -and [string]::IsNullOrWhiteSpace($normalizedReason)) { + throw "stable_window_override_reason_required: min_length=$([int]$script:stablePromotionOverrideReasonMinLength)" + } + + if ([int]$script:stablePromotionOverrideReasonMinLength -gt 0 -and $normalizedReason.Length -lt [int]$script:stablePromotionOverrideReasonMinLength) { + throw "stable_window_override_reason_too_short: min_length=$([int]$script:stablePromotionOverrideReasonMinLength) actual_length=$($normalizedReason.Length)" + } + + $decision.can_promote = $true + $decision.override_applied = $true + $decision.reason_code = 'stable_window_override_applied' + return $decision +} + function Invoke-ReleaseMode { param( [Parameter(Mandatory = $true)][string]$ModeName, @@ -956,6 +1160,19 @@ $report = [ordered]@{ semver_policy_source = $script:semverPolicySource semver_only_enforce_utc = $script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ') semver_only_enforced = [bool]$script:semverOnlyEnforced + stable_promotion_window = [ordered]@{ + policy_source = [string]$script:stablePromotionWindowPolicySource + full_cycle_allowed_utc_weekdays = @($script:stablePromotionFullCycleAllowedUtcWeekdays) + allow_outside_window_with_override = [bool]$script:stablePromotionAllowOutsideWindowWithOverride + override_reason_required = [bool]$script:stablePromotionOverrideReasonRequired + override_reason_min_length = [int]$script:stablePromotionOverrideReasonMinLength + override_requested = [bool]$ForceStablePromotionOutsideWindow + override_reason = ([string]$ForceStablePromotionReason).Trim() + decision = [ordered]@{ + status = 'skipped' + reason_code = 'not_full_cycle_mode' + } + } status = 'fail' reason_code = '' message = '' @@ -1027,17 +1244,24 @@ try { $prereleaseExec = Invoke-ReleaseMode -ModeName 'PromotePrerelease' -DateKey $dateKey -ScratchRoot $scratchRoot [void]$executionList.Add($prereleaseExec) + $stableWindowDecision = Resolve-StablePromotionWindowDecision ` + -NowUtc ([DateTimeOffset]::UtcNow) ` + -OverrideRequested ([bool]$ForceStablePromotionOutsideWindow) ` + -OverrideReason ([string]$ForceStablePromotionReason) + $report.stable_promotion_window.decision = $stableWindowDecision + $stableExec = [ordered]@{ target_release = [ordered]@{ mode = 'PromoteStable' status = 'skipped' - reason_code = 'stable_window_closed' + reason_code = [string]$stableWindowDecision.reason_code tag_family = 'semver' } + stable_window_gate = $stableWindowDecision } - $dayOfWeekUtc = (Get-Date).ToUniversalTime().DayOfWeek.ToString() - if ($dayOfWeekUtc -eq 'Monday') { + if ([bool]$stableWindowDecision.can_promote) { $stableExec = Invoke-ReleaseMode -ModeName 'PromoteStable' -DateKey $dateKey -ScratchRoot $scratchRoot + $stableExec.stable_window_gate = $stableWindowDecision } [void]$executionList.Add($stableExec) } else { diff --git a/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 b/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 index ba1fa98..a1812db 100644 --- a/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 +++ b/scripts/Invoke-ReleaseControlPlaneLocalDocker.ps1 @@ -30,6 +30,12 @@ param( [Parameter()] [switch]$DryRun, + [Parameter()] + [bool]$ForceStablePromotionOutsideWindow = $false, + + [Parameter()] + [string]$ForceStablePromotionReason = '', + [Parameter()] [switch]$AllowMutatingModes, @@ -74,6 +80,14 @@ if ($RunContractTests) { if ($DryRun) { $scriptArgs += '-DryRun' } +if ($ForceStablePromotionOutsideWindow) { + $scriptArgs += '-ForceStablePromotionOutsideWindow' + $scriptArgs += 'true' +} +if (-not [string]::IsNullOrWhiteSpace([string]$ForceStablePromotionReason)) { + $scriptArgs += '-ForceStablePromotionReason' + $scriptArgs += [string]$ForceStablePromotionReason +} if ($AllowMutatingModes) { $scriptArgs += '-AllowMutatingModes' } diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 8958e04..b9f2f4e 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -181,6 +181,10 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_matches_signature_grace_end' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq ([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) -Detail ("semver_only_enforce_utc={0}; signature_grace_end_utc={1}" -f [string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc, [string]$releaseClient.signature_policy.grace_end_utc) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_weekday_monday' -Passed (@($releaseClient.ops_control_plane_policy.stable_promotion_window.full_cycle_allowed_utc_weekdays) -contains 'Monday') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.stable_promotion_window.full_cycle_allowed_utc_weekdays))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_allow_override' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_required' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_min_length' -Passed ([int]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index c6dec73..24091f0 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -105,6 +105,10 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) Add-Check -Name 'ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) Add-Check -Name 'ops_policy_tag_strategy_matches_signature_grace_end' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq ([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) -Detail ("semver_only_enforce_utc={0}; signature_grace_end_utc={1}" -f [string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc, [string]$releaseClient.signature_policy.grace_end_utc) + Add-Check -Name 'ops_policy_stable_window_full_cycle_weekday_monday' -Passed (@($releaseClient.ops_control_plane_policy.stable_promotion_window.full_cycle_allowed_utc_weekdays) -contains 'Monday') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.stable_promotion_window.full_cycle_allowed_utc_weekdays))) + Add-Check -Name 'ops_policy_stable_window_allow_override' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) + Add-Check -Name 'ops_policy_stable_window_reason_required' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) + Add-Check -Name 'ops_policy_stable_window_reason_min_length' -Passed ([int]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length) Add-Check -Name 'ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) Add-Check -Name 'ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) Add-Check -Name 'ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index 0a226bd..3b1c8af 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -100,6 +100,15 @@ try { if (-not $selfHealingPresent) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_self_healing_missing' } + + $stableWindowPresent = ($null -ne $releaseClient.ops_control_plane_policy.stable_promotion_window) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_stable_window_present' + passed = $stableWindowPresent + }) | Out-Null + if (-not $stableWindowPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_stable_window_missing' + } } } } diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index 1c9aadf..4e0574c 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -40,5 +40,6 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'runtime_images_missing' $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' $script:runtimeContent | Should -Match 'ops_control_plane_self_healing_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_missing' } } diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index d0141a7..2d6cdf3 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -64,6 +64,10 @@ Describe 'Release client policy contract' { $releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family | Should -Be 'legacy_date_window' ([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-07-01T00:00:00Z' ([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be (([DateTime]$releaseClient.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) + @($releaseClient.ops_control_plane_policy.stable_promotion_window.full_cycle_allowed_utc_weekdays) | Should -Contain 'Monday' + $releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override | Should -BeTrue + $releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required | Should -BeTrue + $releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length | Should -Be 12 @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops SLO Gate Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' @@ -98,6 +102,7 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'ops_control_plane_policy_exists' $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' $script:policyScriptContent | Should -Match 'ops_policy_tag_strategy_semver_only_enforce' + $script:policyScriptContent | Should -Match 'ops_policy_stable_window_full_cycle_weekday_monday' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_enabled' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_rollback_workflow' $script:policyScriptContent | Should -Match 'ops_policy_rollback_release_limit' diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index e53c88d..a7206e5 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -27,6 +27,8 @@ Describe 'Release control plane workflow contract' { $script:workflowContent | Should -Match 'FullCycle' $script:workflowContent | Should -Match 'auto_remediate' $script:workflowContent | Should -Match 'keep_latest_canary_n' + $script:workflowContent | Should -Match 'force_stable_promotion_outside_window' + $script:workflowContent | Should -Match 'force_stable_promotion_reason' $script:workflowContent | Should -Match 'dry_run' } @@ -50,6 +52,8 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'Resolve-CanaryTargetSemVer' $script:runtimeContent | Should -Match 'Resolve-PromotedTargetSemVer' $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' + $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowPolicy' + $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowDecision' $script:runtimeContent | Should -Match 'Resolve-ControlPlaneFailureReasonCode' $script:runtimeContent | Should -Match 'Verify-DispatchedRelease' $script:runtimeContent | Should -Match 'Verify-PromotionLineage' @@ -66,6 +70,9 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'promotion_source_missing' $script:runtimeContent | Should -Match 'promotion_source_asset_missing' $script:runtimeContent | Should -Match 'promotion_source_not_at_head' + $script:runtimeContent | Should -Match 'stable_window_closed' + $script:runtimeContent | Should -Match 'stable_window_override_applied' + $script:runtimeContent | Should -Match 'stable_window_override_invalid' $script:runtimeContent | Should -Match 'stable_already_published' $script:runtimeContent | Should -Match '\[tag_migration_warning\]' $script:runtimeContent | Should -Match "tag_family = 'semver'" diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index fdd8df0..28e851f 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -34,8 +34,10 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match '20260226' $script:runbookContent | Should -Match 'release_verification_failed' $script:runbookContent | Should -Match 'promotion_lineage_invalid' + $script:runbookContent | Should -Match 'stable_window_override_invalid' $script:runbookContent | Should -Match 'release-manifest\.json' $script:runbookContent | Should -Match 'release_dispatch_watch_failed' + $script:runbookContent | Should -Match 'force_stable_promotion_outside_window=true' } It 'keeps README and AGENTS aligned to Scope A workflows' { diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 505c433..92feb92 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -283,6 +283,10 @@ Describe 'Workspace surface contract' { $script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.legacy_tag_family | Should -Be 'legacy_date_window' ([DateTime]$script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be '2026-07-01T00:00:00Z' ([DateTime]$script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') | Should -Be (([DateTime]$script:manifest.installer_contract.release_client.signature_policy.grace_end_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')) + @($script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.full_cycle_allowed_utc_weekdays) | Should -Contain 'Monday' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_required | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_min_length | Should -Be 12 (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops SLO Gate Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 9e9161f..e79974a 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -281,6 +281,14 @@ "legacy_tag_family": "legacy_date_window", "semver_only_enforce_utc": "2026-07-01T00:00:00Z" }, + "stable_promotion_window": { + "full_cycle_allowed_utc_weekdays": [ + "Monday" + ], + "allow_outside_window_with_override": true, + "override_reason_required": true, + "override_reason_min_length": 12 + }, "self_healing": { "enabled": true, "max_attempts": 1, diff --git a/workspace-governance.json b/workspace-governance.json index 9e9161f..e79974a 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -281,6 +281,14 @@ "legacy_tag_family": "legacy_date_window", "semver_only_enforce_utc": "2026-07-01T00:00:00Z" }, + "stable_promotion_window": { + "full_cycle_allowed_utc_weekdays": [ + "Monday" + ], + "allow_outside_window_with_override": true, + "override_reason_required": true, + "override_reason_min_length": 12 + }, "self_healing": { "enabled": true, "max_attempts": 1, From 7b58e38f401129d4bfadd5ecf4b5fc788fc75565 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 22:29:43 -0800 Subject: [PATCH 40/60] Harden stable override governance and audit artifacts --- .github/workflows/release-control-plane.yml | 59 ++++++ AGENTS.md | 7 + README.md | 8 +- .../runbooks/release-ops-incident-response.md | 7 +- scripts/Exercise-ReleaseControlPlaneLocal.ps1 | 3 + scripts/Invoke-ReleaseControlPlane.ps1 | 200 ++++++++++++++++++ scripts/Test-PolicyContracts.ps1 | 4 + scripts/Test-ReleaseClientContracts.ps1 | 4 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 18 ++ .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 2 + tests/ReleaseClientPolicyContract.Tests.ps1 | 5 + ...lPlaneLocalDockerHarnessContract.Tests.ps1 | 1 + ...easeControlPlaneWorkflowContract.Tests.ps1 | 4 + tests/ScopeAOpsRunbookContract.Tests.ps1 | 3 + tests/WorkspaceSurfaceContract.Tests.ps1 | 3 + .../workspace-governance.json | 4 +- workspace-governance.json | 4 +- 17 files changed, 330 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 89d16c3..6c56e06 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -76,6 +76,7 @@ jobs: run: | $ErrorActionPreference = 'Stop' $reportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-report.json' + $overrideAuditReportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-override-audit.json' $mode = [string]'${{ inputs.mode }}' if ([string]::IsNullOrWhiteSpace($mode)) { @@ -135,6 +136,7 @@ jobs: -ForceStablePromotionOutsideWindow:$forceStablePromotionOutsideWindow ` -ForceStablePromotionReason $forceStablePromotionReason ` -DryRun:$dryRun ` + -OverrideAuditOutputPath $overrideAuditReportPath ` -OutputPath $reportPath - name: Upload release control plane report @@ -145,6 +147,63 @@ jobs: path: ${{ runner.temp }}/release-control-plane-report.json if-no-files-found: error + - name: Upload release control plane stable override audit + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-control-plane-override-audit-${{ github.run_id }} + path: ${{ runner.temp }}/release-control-plane-override-audit.json + if-no-files-found: error + + - name: Open stable override incident issue when out-of-window override is applied + if: always() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-report.json' + $auditPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-override-audit.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "Release control plane report missing: $reportPath" + } + if (-not (Test-Path -LiteralPath $auditPath -PathType Leaf)) { + throw "Release control plane override audit report missing: $auditPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $audit = Get-Content -LiteralPath $auditPath -Raw | ConvertFrom-Json -ErrorAction Stop + $decisionReason = [string]$report.stable_promotion_window.decision.reason_code + if ($decisionReason -ne 'stable_window_override_applied') { + Write-Host "No out-of-window stable override applied. decision_reason_code='$decisionReason'" + exit 0 + } + + $allowedWeekdays = [string]::Join(',', @($report.stable_promotion_window.full_cycle_allowed_utc_weekdays)) + $title = 'Release Control Plane Stable Override Alert' + $body = @" + Out-of-window stable override was applied. + + - Run: $env:RUN_URL + - Mode: $($report.mode) + - Decision: $decisionReason + - Current UTC weekday: $($report.stable_promotion_window.decision.current_utc_weekday) + - Allowed UTC weekdays: $allowedWeekdays + - Override reference: $($audit.override_reference) + - Override reason: $($report.stable_promotion_window.override_reason) + - Stable target tag: $($audit.stable_target_tag) + - Stable release: $($audit.stable_release_url) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + - name: Update release control plane incident issue on failure if: failure() shell: pwsh diff --git a/AGENTS.md b/AGENTS.md index a5d754f..7e395e2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -228,10 +228,15 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `allow_outside_window_with_override` - `override_reason_required` - `override_reason_min_length` + - `override_reason_pattern` + - `override_reason_example` - Full-cycle stable promotion must evaluate stable window policy and record deterministic decision codes (`stable_window_open`, `stable_window_closed`, `stable_window_override_applied`) in execution report metadata. - Emergency stable override is workflow-dispatch only and audited: - `force_stable_promotion_outside_window` - `force_stable_promotion_reason` +- Out-of-window stable override reason must satisfy policy regex and include structured reference (`?`) plus summary (`?`). +- Release-control-plane workflow must always publish `release-control-plane-override-audit.json`. +- Release-control-plane workflow must auto-open incident title `Release Control Plane Stable Override Alert` when out-of-window stable override is applied. - Release-control-plane canary hygiene invocation must enforce `TagFamily=semver`. - Legacy date-window tags (`v0.YYYYMMDD.N`) may still exist during migration but are non-canonical for control-plane dispatch. - Control-plane tag strategy policy must define `ops_control_plane_policy.tag_strategy.semver_only_enforce_utc` (default `2026-07-01T00:00:00Z`) and keep it aligned with signature grace-end during dual-mode transition. @@ -280,6 +285,8 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `ops_control_plane_policy_missing` - `ops_control_plane_self_healing_missing` - `ops_control_plane_stable_window_missing` + - `ops_control_plane_stable_window_reason_pattern_missing` + - `ops_control_plane_stable_window_reason_example_missing` - `policy_drift_runtime_error` - `.github/workflows/release-rollback-drill.yml` must run `scripts/Invoke-RollbackDrillSelfHealing.ps1`. - Rollback self-healing reason codes must remain explicit: diff --git a/README.md b/README.md index f3e7734..8b1299d 100644 --- a/README.md +++ b/README.md @@ -400,8 +400,10 @@ Control-plane behavior: 8. Reads stable promotion window policy from `installer_contract.release_client.ops_control_plane_policy.stable_promotion_window` (default: full-cycle Mondays only, override allowed with audited reason). 9. Supports manual emergency override for FullCycle stable promotion via workflow_dispatch inputs: - `force_stable_promotion_outside_window=true` - - `force_stable_promotion_reason=` -10. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. + - `force_stable_promotion_reason=` +10. Emits explicit override audit artifact `release-control-plane-override-audit.json` for every run. +11. Auto-opens incident title `Release Control Plane Stable Override Alert` whenever decision code is `stable_window_override_applied`. +12. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. Top-level release-control-plane deterministic failure reason codes include: - `ops_health_gate_failed` @@ -447,6 +449,8 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `ops_control_plane_policy_missing` - `ops_control_plane_self_healing_missing` - `ops_control_plane_stable_window_missing` + - `ops_control_plane_stable_window_reason_pattern_missing` + - `ops_control_plane_stable_window_reason_example_missing` `release-rollback-drill.yml` is scheduled daily and supports manual dispatch. It runs `scripts/Invoke-RollbackDrillSelfHealing.ps1` to validate deterministic rollback readiness: - channel-scoped latest/previous release candidates diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index e126195..0e19fac 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -28,7 +28,7 @@ Reason code mapping: - `sync_guard_missing`: no sync-guard run found for branch. - `sync_guard_incomplete`: only in-progress/queued runs exist; no completed run yet. - `promotion_lineage_invalid`: promotion source/target channel, SemVer core, or commit-SHA lineage check failed. -- `stable_window_override_invalid`: requested stable override violated stable window policy (override disabled, missing reason, or reason too short). +- `stable_window_override_invalid`: requested stable override violated stable window policy (override disabled, missing reason, reason too short, or reason format mismatch). - `release_dispatch_watch_failed`: release workflow dispatch completed but run conclusion was not `success`. - `release_verification_failed`: post-dispatch release verification failed (missing assets or invalid `release-manifest.json` metadata). - `canary_hygiene_failed`: SemVer canary retention cleanup failed after publish. @@ -127,11 +127,13 @@ Force stable promotion outside window (audited emergency path): gh workflow run release-control-plane.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` -f mode=FullCycle ` -f force_stable_promotion_outside_window=true ` - -f force_stable_promotion_reason="Emergency promotion after incident remediation" ` + -f force_stable_promotion_reason="CHG-1234: emergency promotion after incident remediation" ` -f auto_remediate=true ` -f dry_run=false ``` +Out-of-window override automatically opens incident title `Release Control Plane Stable Override Alert` and uploads `release-control-plane-override-audit.json`. + Run validation-only health/policy gate: ```powershell @@ -235,5 +237,6 @@ gh workflow run release-rollback-drill.yml -R LabVIEW-Community-CI-CD/labview-cd ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` +- `release-control-plane-override-audit.json` (when override is requested/applied) - sync guard run URL - parity SHAs (upstream and fork) diff --git a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 index a857931..b6b6421 100644 --- a/scripts/Exercise-ReleaseControlPlaneLocal.ps1 +++ b/scripts/Exercise-ReleaseControlPlaneLocal.ps1 @@ -131,6 +131,7 @@ try { } $controlPlanePath = Join-Path $resolvedOutputRoot 'release-control-plane-report.json' + $controlPlaneOverrideAuditPath = Join-Path $resolvedOutputRoot 'release-control-plane-override-audit.json' & pwsh -NoProfile -File $controlPlaneScript ` -Repository $Repository ` -Branch $Branch ` @@ -141,11 +142,13 @@ try { -ForceStablePromotionOutsideWindow:$ForceStablePromotionOutsideWindow ` -ForceStablePromotionReason $ForceStablePromotionReason ` -DryRun:$DryRun ` + -OverrideAuditOutputPath $controlPlaneOverrideAuditPath ` -OutputPath $controlPlanePath if ($LASTEXITCODE -ne 0) { throw "release_control_plane_failed: exit_code=$LASTEXITCODE" } Add-StepResult -Name 'release_control_plane' -Status 'pass' -OutputPath $controlPlanePath + Add-StepResult -Name 'release_control_plane_override_audit' -Status 'pass' -OutputPath $controlPlaneOverrideAuditPath $sloPath = Join-Path $resolvedOutputRoot 'weekly-ops-slo-report.json' & pwsh -NoProfile -File $sloScript ` diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 9bb5d23..b5169cd 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -41,6 +41,9 @@ param( [Parameter()] [string]$ForceStablePromotionReason = '', + [Parameter()] + [string]$OverrideAuditOutputPath = '', + [Parameter()] [string]$OutputPath = '' ) @@ -147,6 +150,8 @@ function Resolve-StablePromotionWindowPolicy { allow_outside_window_with_override = $true override_reason_required = $true override_reason_min_length = 12 + override_reason_pattern = '^(?(?i:(?:CHG|INC|RFC|PR|TASK)-\d{3,}|#\d+))\s*[:\-]\s*(?.+\S)$' + override_reason_example = 'CHG-1234: emergency stable promotion after incident remediation' source = 'default' warnings = @() } @@ -251,6 +256,25 @@ function Resolve-StablePromotionWindowPolicy { } else { [void]$warnings.Add('stable_promotion_window_reason_min_length_missing') } + + $reasonPattern = [string]$candidateWindow.override_reason_pattern + if ([string]::IsNullOrWhiteSpace($reasonPattern)) { + [void]$warnings.Add('stable_promotion_window_reason_pattern_missing') + } else { + try { + [void][regex]::new($reasonPattern) + $policy.override_reason_pattern = $reasonPattern + } catch { + [void]$warnings.Add("stable_promotion_window_reason_pattern_invalid: value=$reasonPattern") + } + } + + $reasonExample = [string]$candidateWindow.override_reason_example + if ([string]::IsNullOrWhiteSpace($reasonExample)) { + [void]$warnings.Add('stable_promotion_window_reason_example_missing') + } else { + $policy.override_reason_example = $reasonExample.Trim() + } } catch { [void]$warnings.Add("stable_promotion_window_policy_load_failed: $([string]$_.Exception.Message)") } @@ -275,6 +299,8 @@ $script:stablePromotionFullCycleAllowedUtcWeekdays = @($stablePromotionWindowPol $script:stablePromotionAllowOutsideWindowWithOverride = [bool]$stablePromotionWindowPolicy.allow_outside_window_with_override $script:stablePromotionOverrideReasonRequired = [bool]$stablePromotionWindowPolicy.override_reason_required $script:stablePromotionOverrideReasonMinLength = [int]$stablePromotionWindowPolicy.override_reason_min_length +$script:stablePromotionOverrideReasonPattern = [string]$stablePromotionWindowPolicy.override_reason_pattern +$script:stablePromotionOverrideReasonExample = [string]$stablePromotionWindowPolicy.override_reason_example foreach ($warning in @($stablePromotionWindowPolicy.warnings)) { Write-Warning "[stable_promotion_window_policy_warning] $warning" } @@ -901,7 +927,12 @@ function Resolve-StablePromotionWindowDecision { allow_outside_window_with_override = [bool]$script:stablePromotionAllowOutsideWindowWithOverride override_reason_required = [bool]$script:stablePromotionOverrideReasonRequired override_reason_min_length = [int]$script:stablePromotionOverrideReasonMinLength + override_reason_pattern = [string]$script:stablePromotionOverrideReasonPattern + override_reason_example = [string]$script:stablePromotionOverrideReasonExample override_reason = $normalizedReason + override_reference = '' + override_summary = '' + structured_reason_valid = $false can_promote = $false reason_code = '' } @@ -930,12 +961,174 @@ function Resolve-StablePromotionWindowDecision { throw "stable_window_override_reason_too_short: min_length=$([int]$script:stablePromotionOverrideReasonMinLength) actual_length=$($normalizedReason.Length)" } + $reasonPattern = ([string]$script:stablePromotionOverrideReasonPattern).Trim() + if ([string]::IsNullOrWhiteSpace($reasonPattern)) { + throw 'stable_window_override_reason_pattern_missing' + } + + $reasonMatch = $null + try { + $reasonMatch = [regex]::Match($normalizedReason, $reasonPattern, [System.Text.RegularExpressions.RegexOptions]::CultureInvariant) + } catch { + throw "stable_window_override_reason_pattern_invalid: pattern=$reasonPattern" + } + + if ($null -eq $reasonMatch -or -not $reasonMatch.Success) { + throw "stable_window_override_reason_format_invalid: expected_pattern=$reasonPattern" + } + + $referenceGroup = $reasonMatch.Groups['reference'] + $summaryGroup = $reasonMatch.Groups['summary'] + $overrideReference = if ($null -ne $referenceGroup -and $referenceGroup.Success) { ([string]$referenceGroup.Value).Trim() } else { '' } + $overrideSummary = if ($null -ne $summaryGroup -and $summaryGroup.Success) { ([string]$summaryGroup.Value).Trim() } else { '' } + if ([string]::IsNullOrWhiteSpace($overrideReference)) { + throw 'stable_window_override_reason_reference_missing' + } + if ([string]::IsNullOrWhiteSpace($overrideSummary)) { + throw 'stable_window_override_reason_summary_missing' + } + $decision.can_promote = $true $decision.override_applied = $true + $decision.override_reference = $overrideReference + $decision.override_summary = $overrideSummary + $decision.structured_reason_valid = $true $decision.reason_code = 'stable_window_override_applied' return $decision } +function Write-StableOverrideAuditReport { + param( + [Parameter(Mandatory = $true)][object]$ControlPlaneReport, + [Parameter()][string]$OutputPath = '' + ) + + if ([string]::IsNullOrWhiteSpace([string]$OutputPath)) { + return + } + + $window = $ControlPlaneReport.stable_promotion_window + $decision = $null + if ($null -ne $window) { + $decision = $window.decision + } + + function Get-PropertyValueOrDefault { + param( + [Parameter()][AllowNull()]$Object, + [Parameter(Mandatory = $true)][string]$Name, + [Parameter()][AllowNull()]$DefaultValue = $null + ) + + if ($null -eq $Object) { + return $DefaultValue + } + + $prop = $Object.PSObject.Properties[$Name] + if ($null -eq $prop) { + return $DefaultValue + } + + return $prop.Value + } + + $stableExecution = @( + @($ControlPlaneReport.executions) | + Where-Object { [string]$_.mode -eq 'PromoteStable' } | + Select-Object -First 1 + ) + + $stableTargetTag = '' + $stableDispatchRunId = '' + $stableReleaseUrl = '' + if (@($stableExecution).Count -eq 1) { + if ($null -ne $stableExecution[0].target_release) { + $stableTargetTag = [string]$stableExecution[0].target_release.tag + } + if ($null -ne $stableExecution[0].dispatch) { + $stableDispatchRunId = [string]$stableExecution[0].dispatch.run_id + } + if ($null -ne $stableExecution[0].release_verification) { + $stableReleaseUrl = [string]$stableExecution[0].release_verification.release_url + } + } + + $overrideRequested = $false + $overrideApplied = $false + $structuredReasonValid = $false + $overrideReason = '' + $overrideReference = '' + $overrideSummary = '' + $policySource = '' + $decisionReason = '' + $currentUtc = '' + $currentUtcWeekday = '' + $allowedWeekdays = @() + $auditStatus = 'not_applicable' + $auditReason = 'not_full_cycle_mode' + + if ($null -ne $window) { + $overrideRequested = [bool](Get-PropertyValueOrDefault -Object $window -Name 'override_requested' -DefaultValue $false) + $overrideReason = [string](Get-PropertyValueOrDefault -Object $window -Name 'override_reason' -DefaultValue '') + $policySource = [string](Get-PropertyValueOrDefault -Object $window -Name 'policy_source' -DefaultValue '') + $allowedWeekdays = @((Get-PropertyValueOrDefault -Object $window -Name 'full_cycle_allowed_utc_weekdays' -DefaultValue @())) + } + + if ($null -ne $decision) { + $decisionReason = [string](Get-PropertyValueOrDefault -Object $decision -Name 'reason_code' -DefaultValue '') + $currentUtc = [string](Get-PropertyValueOrDefault -Object $decision -Name 'current_utc' -DefaultValue '') + $currentUtcWeekday = [string](Get-PropertyValueOrDefault -Object $decision -Name 'current_utc_weekday' -DefaultValue '') + $overrideApplied = [bool](Get-PropertyValueOrDefault -Object $decision -Name 'override_applied' -DefaultValue $false) + $overrideReference = [string](Get-PropertyValueOrDefault -Object $decision -Name 'override_reference' -DefaultValue '') + $overrideSummary = [string](Get-PropertyValueOrDefault -Object $decision -Name 'override_summary' -DefaultValue '') + $structuredReasonValid = [bool](Get-PropertyValueOrDefault -Object $decision -Name 'structured_reason_valid' -DefaultValue $false) + } + + if ([string]$decisionReason -eq 'stable_window_override_applied') { + $auditStatus = 'override_applied' + $auditReason = 'stable_window_override_applied' + } elseif ($overrideRequested) { + $auditStatus = 'override_requested_not_applied' + $auditReason = if ([string]::IsNullOrWhiteSpace($decisionReason)) { 'override_requested' } else { $decisionReason } + } elseif ([string]$ControlPlaneReport.mode -eq 'FullCycle') { + $auditStatus = 'window_default_path' + $auditReason = if ([string]::IsNullOrWhiteSpace($decisionReason)) { 'stable_window_not_evaluated' } else { $decisionReason } + } + + if ([string]$ControlPlaneReport.status -eq 'fail' -and [string]$ControlPlaneReport.reason_code -eq 'stable_window_override_invalid') { + $auditStatus = 'override_rejected' + $auditReason = 'stable_window_override_invalid' + } + + $auditReport = [ordered]@{ + schema_version = '1.0' + timestamp_utc = Get-UtcNowIso + repository = [string]$ControlPlaneReport.repository + branch = [string]$ControlPlaneReport.branch + mode = [string]$ControlPlaneReport.mode + run_status = [string]$ControlPlaneReport.status + run_reason_code = [string]$ControlPlaneReport.reason_code + status = $auditStatus + reason_code = $auditReason + stable_target_tag = $stableTargetTag + stable_dispatch_run_id = $stableDispatchRunId + stable_release_url = $stableReleaseUrl + override_requested = $overrideRequested + override_applied = $overrideApplied + override_reason = $overrideReason + override_reference = $overrideReference + override_summary = $overrideSummary + structured_reason_valid = $structuredReasonValid + policy_source = $policySource + full_cycle_allowed_utc_weekdays = @($allowedWeekdays) + current_utc = $currentUtc + current_utc_weekday = $currentUtcWeekday + decision_reason_code = $decisionReason + } + + Write-WorkflowOpsReport -Report $auditReport -OutputPath $OutputPath | Out-Null +} + function Invoke-ReleaseMode { param( [Parameter(Mandatory = $true)][string]$ModeName, @@ -1166,6 +1359,8 @@ $report = [ordered]@{ allow_outside_window_with_override = [bool]$script:stablePromotionAllowOutsideWindowWithOverride override_reason_required = [bool]$script:stablePromotionOverrideReasonRequired override_reason_min_length = [int]$script:stablePromotionOverrideReasonMinLength + override_reason_pattern = [string]$script:stablePromotionOverrideReasonPattern + override_reason_example = [string]$script:stablePromotionOverrideReasonExample override_requested = [bool]$ForceStablePromotionOutsideWindow override_reason = ([string]$ForceStablePromotionReason).Trim() decision = [ordered]@{ @@ -1283,6 +1478,11 @@ catch { } finally { Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + try { + Write-StableOverrideAuditReport -ControlPlaneReport $report -OutputPath $OverrideAuditOutputPath + } catch { + Write-Warning ("[stable_override_audit_warning] {0}" -f [string]$_.Exception.Message) + } if (Test-Path -LiteralPath $scratchRoot -PathType Container) { Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue } diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index b9f2f4e..491e135 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -185,6 +185,10 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_allow_override' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_required' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_min_length' -Passed ([int]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_pattern_exists' -Passed (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern)) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_pattern_has_reference_group' -Passed ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern -match '\?') -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_pattern_has_summary_group' -Passed ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern -match '\?') -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_stable_window_reason_example' -Passed (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example)) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 24091f0..d6119c8 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -109,6 +109,10 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_stable_window_allow_override' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override) Add-Check -Name 'ops_policy_stable_window_reason_required' -Passed ([bool]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required) Add-Check -Name 'ops_policy_stable_window_reason_min_length' -Passed ([int]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length) + Add-Check -Name 'ops_policy_stable_window_reason_pattern_exists' -Passed (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern)) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) + Add-Check -Name 'ops_policy_stable_window_reason_pattern_has_reference_group' -Passed ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern -match '\?') -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) + Add-Check -Name 'ops_policy_stable_window_reason_pattern_has_summary_group' -Passed ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern -match '\?') -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) + Add-Check -Name 'ops_policy_stable_window_reason_example' -Passed (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example)) -Detail ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example) Add-Check -Name 'ops_policy_self_healing_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.enabled) Add-Check -Name 'ops_policy_self_healing_max_attempts' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.max_attempts -eq 1) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.max_attempts) Add-Check -Name 'ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index 3b1c8af..bbed36d 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -108,6 +108,24 @@ try { }) | Out-Null if (-not $stableWindowPresent) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_stable_window_missing' + } else { + $stableWindowPatternPresent = (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern)) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_stable_window_reason_pattern_present' + passed = $stableWindowPatternPresent + }) | Out-Null + if (-not $stableWindowPatternPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_stable_window_reason_pattern_missing' + } + + $stableWindowReasonExamplePresent = (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example)) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_stable_window_reason_example_present' + passed = $stableWindowReasonExamplePresent + }) | Out-Null + if (-not $stableWindowReasonExamplePresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_stable_window_reason_example_missing' + } } } } diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index 4e0574c..b542ed7 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -41,5 +41,7 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' $script:runtimeContent | Should -Match 'ops_control_plane_self_healing_missing' $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_reason_pattern_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_reason_example_missing' } } diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index 2d6cdf3..ab2f0e5 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -68,6 +68,9 @@ Describe 'Release client policy contract' { $releaseClient.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override | Should -BeTrue $releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_required | Should -BeTrue $releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_min_length | Should -Be 12 + ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) | Should -Match '\?' + ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) | Should -Match '\?' + ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example) | Should -Match '^CHG-' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops SLO Gate Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' @@ -103,6 +106,8 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' $script:policyScriptContent | Should -Match 'ops_policy_tag_strategy_semver_only_enforce' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_full_cycle_weekday_monday' + $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_pattern_exists' + $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_example' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_enabled' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_rollback_workflow' $script:policyScriptContent | Should -Match 'ops_policy_rollback_release_limit' diff --git a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 index fbeaab2..bbe83e8 100644 --- a/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneLocalDockerHarnessContract.Tests.ps1 @@ -36,6 +36,7 @@ Describe 'Release control plane local Docker harness contract' { $script:harnessContent | Should -Match 'RequiredRunnerLabelsCsv \$releaseRunnerLabelsCsv' $script:harnessContent | Should -Match "self-hosted', 'windows', 'self-hosted-windows-lv" $script:harnessContent | Should -Match 'release-control-plane-local-summary\.json' + $script:harnessContent | Should -Match 'release-control-plane-override-audit\.json' } It 'guards mutating modes unless explicitly allowed' { diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index a7206e5..2eed1cd 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -40,6 +40,8 @@ Describe 'Release control plane workflow contract' { $script:workflowContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'release-control-plane-report\.json' + $script:workflowContent | Should -Match 'release-control-plane-override-audit\.json' + $script:workflowContent | Should -Match 'Release Control Plane Stable Override Alert' $script:workflowContent | Should -Match 'Release Control Plane Alert' $script:workflowContent | Should -Match '-Mode Fail' $script:workflowContent | Should -Match '-Mode Recover' @@ -54,6 +56,7 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowPolicy' $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowDecision' + $script:runtimeContent | Should -Match 'Write-StableOverrideAuditReport' $script:runtimeContent | Should -Match 'Resolve-ControlPlaneFailureReasonCode' $script:runtimeContent | Should -Match 'Verify-DispatchedRelease' $script:runtimeContent | Should -Match 'Verify-PromotionLineage' @@ -73,6 +76,7 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'stable_window_closed' $script:runtimeContent | Should -Match 'stable_window_override_applied' $script:runtimeContent | Should -Match 'stable_window_override_invalid' + $script:runtimeContent | Should -Match 'stable_window_override_reason_format_invalid' $script:runtimeContent | Should -Match 'stable_already_published' $script:runtimeContent | Should -Match '\[tag_migration_warning\]' $script:runtimeContent | Should -Match "tag_family = 'semver'" diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index 28e851f..d848d71 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -38,6 +38,9 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'release-manifest\.json' $script:runbookContent | Should -Match 'release_dispatch_watch_failed' $script:runbookContent | Should -Match 'force_stable_promotion_outside_window=true' + $script:runbookContent | Should -Match 'CHG-1234' + $script:runbookContent | Should -Match 'Release Control Plane Stable Override Alert' + $script:runbookContent | Should -Match 'release-control-plane-override-audit\.json' } It 'keeps README and AGENTS aligned to Scope A workflows' { diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 92feb92..1efaac6 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -287,6 +287,9 @@ Describe 'Workspace surface contract' { $script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.allow_outside_window_with_override | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_required | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_min_length | Should -Be 12 + ([string]$script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) | Should -Match '\?' + ([string]$script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_pattern) | Should -Match '\?' + ([string]$script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_example) | Should -Match '^CHG-' (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops SLO Gate Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index e79974a..0832bd1 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -287,7 +287,9 @@ ], "allow_outside_window_with_override": true, "override_reason_required": true, - "override_reason_min_length": 12 + "override_reason_min_length": 12, + "override_reason_pattern": "^(?(?i:(?:CHG|INC|RFC|PR|TASK)-\\d{3,}|#\\d+))\\s*[:\\-]\\s*(?.+\\S)$", + "override_reason_example": "CHG-1234: emergency stable promotion after incident remediation" }, "self_healing": { "enabled": true, diff --git a/workspace-governance.json b/workspace-governance.json index e79974a..0832bd1 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -287,7 +287,9 @@ ], "allow_outside_window_with_override": true, "override_reason_required": true, - "override_reason_min_length": 12 + "override_reason_min_length": 12, + "override_reason_pattern": "^(?(?i:(?:CHG|INC|RFC|PR|TASK)-\\d{3,}|#\\d+))\\s*[:\\-]\\s*(?.+\\S)$", + "override_reason_example": "CHG-1234: emergency stable promotion after incident remediation" }, "self_healing": { "enabled": true, From 00fdbacb77b8c3e310b165c3b1b5e9b01462e61a Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 22:49:19 -0800 Subject: [PATCH 41/60] Fix stable override audit extraction for hashtable reports --- scripts/Invoke-ReleaseControlPlane.ps1 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index b5169cd..45830e3 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -1024,6 +1024,13 @@ function Write-StableOverrideAuditReport { return $DefaultValue } + if ($Object -is [System.Collections.IDictionary]) { + if ($Object.Contains($Name)) { + return $Object[$Name] + } + return $DefaultValue + } + $prop = $Object.PSObject.Properties[$Name] if ($null -eq $prop) { return $DefaultValue From eb5117e6572eccc19eb82edff46e2c6dbf1d3533 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 23:08:54 -0800 Subject: [PATCH 42/60] Harden release concurrency and SemVer collision retries --- .github/workflows/release-control-plane.yml | 4 + .../workflows/release-workspace-installer.yml | 4 + scripts/Invoke-ReleaseControlPlane.ps1 | 343 ++++++++++++++---- ...easeControlPlaneWorkflowContract.Tests.ps1 | 9 + ...orkspaceInstallerReleaseContract.Tests.ps1 | 3 + 5 files changed, 296 insertions(+), 67 deletions(-) diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 6c56e06..6a03143 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -52,6 +52,10 @@ permissions: actions: write issues: write +concurrency: + group: release-control-plane-${{ github.repository }}-${{ github.ref_name }} + cancel-in-progress: false + jobs: release-control-plane: name: Release Control Plane diff --git a/.github/workflows/release-workspace-installer.yml b/.github/workflows/release-workspace-installer.yml index 2bd894d..4a83f39 100644 --- a/.github/workflows/release-workspace-installer.yml +++ b/.github/workflows/release-workspace-installer.yml @@ -26,6 +26,10 @@ on: permissions: contents: write +concurrency: + group: release-workspace-installer-${{ github.repository }}-${{ inputs.release_tag }} + cancel-in-progress: false + jobs: release-workspace-installer: name: Release Workspace Installer diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 45830e3..622c60b 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -338,6 +338,8 @@ function Resolve-ControlPlaneFailureReasonCode { if ($message -match '^stable_window_override_') { return 'stable_window_override_invalid' } if ($message -match '^branch_head_unresolved') { return 'branch_head_unresolved' } if ($message -match '^semver_prerelease_sequence_exhausted') { return 'semver_prerelease_sequence_exhausted' } + if ($message -match '^release_tag_collision_retry_exhausted') { return 'release_tag_collision_retry_exhausted' } + if ($message -match '^release_dispatch_attempts_exhausted') { return 'release_dispatch_attempts_exhausted' } if ($message -match '^release_watch_failed|^release_watch_not_success') { return 'release_dispatch_watch_failed' } if ($message -match '^release_verification_') { return 'release_verification_failed' } if ($message -match '^canary_hygiene_failed') { return 'canary_hygiene_failed' } @@ -899,6 +901,86 @@ function Resolve-PromotedTargetSemVer { throw "unsupported_target_channel: $TargetChannel" } +function Get-ReleasePlanningState { + param( + [Parameter(Mandatory = $true)][string]$Repository + ) + + $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) + $allRecords = @( + $releaseList | + ForEach-Object { Convert-ReleaseToRecord -Release $_ } | + Where-Object { $null -ne $_ } + ) + $legacyRecords = @( + $allRecords | + Where-Object { [string]$_.tag_family -eq 'legacy_date_window' -and [string]$_.channel -ne 'unknown' } + ) + + $migrationWarnings = @() + if (@($legacyRecords).Count -gt 0) { + if ($script:semverOnlyEnforced) { + throw "semver_only_enforcement_violation: semver_only_enforce_utc=$($script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ')) legacy_tag_count=$(@($legacyRecords).Count)" + } + $migrationWarnings += "Legacy date-window release tags remain present in '$Repository'. Control-plane dispatch now targets SemVer channel tags and legacy compatibility ends at $($script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ'))." + } + + return [ordered]@{ + records = @($allRecords) + migration_warnings = @($migrationWarnings) + } +} + +function Resolve-TargetPlanForMode { + param( + [Parameter(Mandatory = $true)][string]$ModeName, + [Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @(), + [Parameter(Mandatory = $true)]$ModeConfig, + [Parameter()][AllowNull()]$SourceCore = $null + ) + + if ($ModeName -eq 'CanaryCycle') { + return Resolve-CanaryTargetSemVer -Records $Records + } + + if ($ModeName -eq 'PromotePrerelease' -or $ModeName -eq 'PromoteStable') { + if ($null -eq $SourceCore) { + throw "promotion_source_missing: channel=$([string]$ModeConfig.source_channel_for_promotion) strategy=semver" + } + return Resolve-PromotedTargetSemVer -Records $Records -TargetChannel ([string]$ModeConfig.channel) -SourceCore $SourceCore + } + + throw "unsupported_release_mode: $ModeName" +} + +function Get-ReleaseByTagOrNull { + param( + [Parameter(Mandatory = $true)][string]$Repository, + [Parameter(Mandatory = $true)][string]$Tag + ) + + $viewOutput = & gh release view $Tag -R $Repository --json tagName,publishedAt,url 2>&1 + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + $viewText = if ($viewOutput -is [System.Array]) { + (($viewOutput | ForEach-Object { [string]$_ }) -join [Environment]::NewLine) + } else { + [string]$viewOutput + } + + if ($exitCode -eq 0) { + if ([string]::IsNullOrWhiteSpace($viewText)) { + throw ("gh_command_failed: exit={0} command=gh release view {1} -R {2} --json tagName,publishedAt,url" -f $exitCode, $Tag, $Repository) + } + return ($viewText | ConvertFrom-Json -ErrorAction Stop) + } + + if ($viewText -match '(?i)not found|http 404|release.*not found') { + return $null + } + + throw ("gh_command_failed: exit={0} command=gh release view {1} -R {2} --json tagName,publishedAt,url error={3}" -f $exitCode, $Tag, $Repository, ($viewText.Trim())) +} + function Resolve-StablePromotionWindowDecision { param( [Parameter(Mandatory = $true)][DateTimeOffset]$NowUtc, @@ -1154,21 +1236,9 @@ function Invoke-ReleaseMode { } $modeConfig = Get-ModeConfig -ModeName $ModeName - $releaseList = @(Get-GhReleasesPortable -Repository $Repository -Limit 100 -ExcludeDrafts) - $allRecords = @( - $releaseList | - ForEach-Object { Convert-ReleaseToRecord -Release $_ } | - Where-Object { $null -ne $_ } - ) - $legacyRecords = @($allRecords | Where-Object { [string]$_.tag_family -eq 'legacy_date_window' -and [string]$_.channel -ne 'unknown' }) - - $migrationWarnings = @() - if (@($legacyRecords).Count -gt 0) { - if ($script:semverOnlyEnforced) { - throw "semver_only_enforcement_violation: semver_only_enforce_utc=$($script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ')) legacy_tag_count=$(@($legacyRecords).Count)" - } - $migrationWarnings += "Legacy date-window release tags remain present in '$Repository'. Control-plane dispatch now targets SemVer channel tags and legacy compatibility ends at $($script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ'))." - } + $planningState = Get-ReleasePlanningState -Repository $Repository + $allRecords = @($planningState.records) + $migrationWarnings = @($planningState.migration_warnings) $sourceRecord = $null $sourceCore = $null @@ -1224,17 +1294,7 @@ function Invoke-ReleaseMode { } } - $targetPlan = $null - if ($ModeName -eq 'CanaryCycle') { - $targetPlan = Resolve-CanaryTargetSemVer -Records $allRecords - } elseif ($ModeName -eq 'PromotePrerelease' -or $ModeName -eq 'PromoteStable') { - if ($null -eq $sourceCore) { - throw "promotion_source_missing: channel=$([string]$modeConfig.source_channel_for_promotion) strategy=semver" - } - $targetPlan = Resolve-PromotedTargetSemVer -Records $allRecords -TargetChannel ([string]$modeConfig.channel) -SourceCore $sourceCore - } else { - throw "unsupported_release_mode: $ModeName" - } + $targetPlan = Resolve-TargetPlanForMode -ModeName $ModeName -Records $allRecords -ModeConfig $modeConfig -SourceCore $sourceCore $targetTag = [string]$targetPlan.tag $targetCoreText = Format-CoreVersion -Core $targetPlan.core @@ -1249,6 +1309,10 @@ function Invoke-ReleaseMode { status = if ([bool]$targetPlan.skipped) { 'skipped' } else { 'planned' } reason_code = if ([bool]$targetPlan.skipped) { [string]$targetPlan.reason_code } else { '' } migration_warnings = @($migrationWarnings) + dispatch_retry_max_attempts = 4 + dispatch_attempts = 0 + collision_retries = 0 + dispatch_attempt_history = @() } if (@($migrationWarnings).Count -gt 0) { @@ -1268,56 +1332,201 @@ function Invoke-ReleaseMode { branch = $Branch run_id = '' url = '' + attempts = 0 + collision_retries = 0 } return $executionReport } - $dispatchReportPath = Join-Path $ScratchRoot "$ModeName-dispatch.json" - $dispatchInputs = @( - "release_tag=$targetTag", - 'allow_existing_tag=false', - "prerelease=$(([string]([bool]$modeConfig.prerelease)).ToLowerInvariant())", - "release_channel=$([string]$modeConfig.channel)" - ) - & $dispatchWorkflowScript ` - -Repository $Repository ` - -WorkflowFile $ReleaseWorkflowFile ` - -Branch $Branch ` - -Inputs $dispatchInputs ` - -OutputPath $dispatchReportPath | Out-Null - $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop - - $watchReportPath = Join-Path $ScratchRoot "$ModeName-watch.json" - & pwsh -NoProfile -File $watchWorkflowScript ` - -Repository $Repository ` - -RunId ([string]$dispatchReport.run_id) ` - -TimeoutMinutes $WatchTimeoutMinutes ` - -OutputPath $watchReportPath | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "release_watch_failed: mode=$ModeName run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" - } - $watchReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $dispatchRetryMaxAttempts = 4 + $dispatchAttempt = 0 + $collisionRetryCount = 0 + $attemptHistory = [System.Collections.Generic.List[object]]::new() + $dispatchRecord = $null + $releaseVerification = $null + + while ($dispatchAttempt -lt $dispatchRetryMaxAttempts) { + $dispatchAttempt++ + + if ($dispatchAttempt -gt 1) { + $planningState = Get-ReleasePlanningState -Repository $Repository + $allRecords = @($planningState.records) + $targetPlan = Resolve-TargetPlanForMode -ModeName $ModeName -Records $allRecords -ModeConfig $modeConfig -SourceCore $sourceCore + $targetTag = [string]$targetPlan.tag + $targetCoreText = Format-CoreVersion -Core $targetPlan.core + + $executionReport.target_release.tag = $targetTag + $executionReport.target_release.core = $targetCoreText + $executionReport.target_release.prerelease_sequence = [int]$targetPlan.prerelease_sequence + $executionReport.target_release.status = if ([bool]$targetPlan.skipped) { 'skipped' } else { 'planned' } + $executionReport.target_release.reason_code = if ([bool]$targetPlan.skipped) { [string]$targetPlan.reason_code } else { '' } + $executionReport.target_release.migration_warnings = @($planningState.migration_warnings) + } + + if ([bool]$targetPlan.skipped) { + if ([string]$targetPlan.reason_code -eq 'stable_already_published') { + $releaseVerification = Verify-DispatchedRelease ` + -TargetTag $targetTag ` + -ExpectedChannel ([string]$modeConfig.channel) ` + -ExpectedIsPrerelease ([bool]$modeConfig.prerelease) ` + -ModeName $ModeName ` + -ScratchRoot $ScratchRoot + $dispatchRecord = [ordered]@{ + status = 'collision_resolved_existing_stable' + workflow = $ReleaseWorkflowFile + branch = $Branch + run_id = '' + url = [string]$releaseVerification.release_url + conclusion = 'success' + attempts = $dispatchAttempt + collision_retries = $collisionRetryCount + reason_code = 'stable_already_published' + } + [void]$attemptHistory.Add([ordered]@{ + attempt = $dispatchAttempt + tag = $targetTag + status = 'stable_already_published' + reason_code = 'stable_already_published' + }) + break + } + + throw "release_dispatch_attempts_exhausted: mode=$ModeName attempts=$dispatchAttempt tag=$targetTag reason=$([string]$targetPlan.reason_code)" + } + + $existingBeforeDispatch = Get-ReleaseByTagOrNull -Repository $Repository -Tag $targetTag + if ($null -ne $existingBeforeDispatch) { + $collisionRetryCount++ + Write-Warning ("[release_tag_collision] mode={0} attempt={1} tag={2} already exists at {3}. Replanning." -f $ModeName, $dispatchAttempt, $targetTag, [string]$existingBeforeDispatch.url) + [void]$attemptHistory.Add([ordered]@{ + attempt = $dispatchAttempt + tag = $targetTag + status = 'collision_pre_dispatch' + existing_release_url = [string]$existingBeforeDispatch.url + existing_release_published_at_utc = [string]$existingBeforeDispatch.publishedAt + }) + if ($dispatchAttempt -ge $dispatchRetryMaxAttempts) { + throw "release_tag_collision_retry_exhausted: mode=$ModeName attempts=$dispatchAttempt tag=$targetTag" + } + continue + } + + $dispatchReportPath = Join-Path $ScratchRoot "$ModeName-dispatch-$dispatchAttempt.json" + $dispatchInputs = @( + "release_tag=$targetTag", + 'allow_existing_tag=false', + "prerelease=$(([string]([bool]$modeConfig.prerelease)).ToLowerInvariant())", + "release_channel=$([string]$modeConfig.channel)" + ) - $watchConclusion = [string]$watchReport.conclusion - if ($watchConclusion -ne 'success') { - throw "release_watch_not_success: mode=$ModeName run_id=$([string]$dispatchReport.run_id) conclusion=$watchConclusion" + try { + & $dispatchWorkflowScript ` + -Repository $Repository ` + -WorkflowFile $ReleaseWorkflowFile ` + -Branch $Branch ` + -Inputs $dispatchInputs ` + -OutputPath $dispatchReportPath | Out-Null + $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + + $watchReportPath = Join-Path $ScratchRoot "$ModeName-watch-$dispatchAttempt.json" + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $Repository ` + -RunId ([string]$dispatchReport.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -OutputPath $watchReportPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "release_watch_failed: mode=$ModeName run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" + } + + $watchReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $watchConclusion = [string]$watchReport.conclusion + if ($watchConclusion -ne 'success') { + throw "release_watch_not_success: mode=$ModeName run_id=$([string]$dispatchReport.run_id) conclusion=$watchConclusion" + } + + $dispatchRecord = [ordered]@{ + status = 'success' + workflow = $ReleaseWorkflowFile + branch = $Branch + run_id = [string]$dispatchReport.run_id + url = [string]$watchReport.url + conclusion = [string]$watchReport.conclusion + attempts = $dispatchAttempt + collision_retries = $collisionRetryCount + } + [void]$attemptHistory.Add([ordered]@{ + attempt = $dispatchAttempt + tag = $targetTag + status = 'success' + run_id = [string]$dispatchReport.run_id + run_url = [string]$watchReport.url + }) + break + } catch { + $dispatchError = [string]$_.Exception.Message + $existingAfterFailure = Get-ReleaseByTagOrNull -Repository $Repository -Tag $targetTag + if ($null -ne $existingAfterFailure) { + $collisionRetryCount++ + Write-Warning ("[release_tag_collision] mode={0} attempt={1} tag={2} observed after failure. Verifying existing release." -f $ModeName, $dispatchAttempt, $targetTag) + [void]$attemptHistory.Add([ordered]@{ + attempt = $dispatchAttempt + tag = $targetTag + status = 'collision_post_dispatch' + dispatch_error = $dispatchError + existing_release_url = [string]$existingAfterFailure.url + existing_release_published_at_utc = [string]$existingAfterFailure.publishedAt + }) + + try { + $releaseVerification = Verify-DispatchedRelease ` + -TargetTag $targetTag ` + -ExpectedChannel ([string]$modeConfig.channel) ` + -ExpectedIsPrerelease ([bool]$modeConfig.prerelease) ` + -ModeName $ModeName ` + -ScratchRoot $ScratchRoot + $dispatchRecord = [ordered]@{ + status = 'collision_resolved_existing_release' + workflow = $ReleaseWorkflowFile + branch = $Branch + run_id = '' + url = [string]$releaseVerification.release_url + conclusion = 'success' + attempts = $dispatchAttempt + collision_retries = $collisionRetryCount + reason_code = 'tag_already_published_by_peer' + } + break + } catch { + $verifyError = [string]$_.Exception.Message + if ($dispatchAttempt -ge $dispatchRetryMaxAttempts) { + throw "release_tag_collision_retry_exhausted: mode=$ModeName attempts=$dispatchAttempt tag=$targetTag last_error=$dispatchError verify_error=$verifyError" + } + continue + } + } + + throw + } } - $executionReport.dispatch = [ordered]@{ - status = 'success' - workflow = $ReleaseWorkflowFile - branch = $Branch - run_id = [string]$dispatchReport.run_id - url = [string]$watchReport.url - conclusion = [string]$watchReport.conclusion + if ($null -eq $dispatchRecord) { + throw "release_dispatch_attempts_exhausted: mode=$ModeName attempts=$dispatchAttempt tag=$targetTag" } - $executionReport.release_verification = Verify-DispatchedRelease ` - -TargetTag $targetTag ` - -ExpectedChannel ([string]$modeConfig.channel) ` - -ExpectedIsPrerelease ([bool]$modeConfig.prerelease) ` - -ModeName $ModeName ` - -ScratchRoot $ScratchRoot + $executionReport.target_release.dispatch_attempts = $dispatchAttempt + $executionReport.target_release.collision_retries = $collisionRetryCount + $executionReport.target_release.dispatch_attempt_history = @($attemptHistory) + $executionReport.dispatch = $dispatchRecord + + if ($null -eq $releaseVerification) { + $releaseVerification = Verify-DispatchedRelease ` + -TargetTag $targetTag ` + -ExpectedChannel ([string]$modeConfig.channel) ` + -ExpectedIsPrerelease ([bool]$modeConfig.prerelease) ` + -ModeName $ModeName ` + -ScratchRoot $ScratchRoot + } + $executionReport.release_verification = $releaseVerification $executionReport.promotion_lineage = Verify-PromotionLineage ` -ModeName $ModeName ` diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 2eed1cd..b542343 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -34,6 +34,9 @@ Describe 'Release control plane workflow contract' { It 'runs autonomous control-plane runtime and uploads report' { $script:workflowContent | Should -Match 'runs-on:\s*ubuntu-latest' + $script:workflowContent | Should -Match 'concurrency:' + $script:workflowContent | Should -Match 'group:\s*release-control-plane-\$\{\{\s*github\.repository\s*\}\}-\$\{\{\s*github\.ref_name\s*\}\}' + $script:workflowContent | Should -Match 'cancel-in-progress:\s*false' $script:workflowContent | Should -Match 'Enforce hosted-runner lock' $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' $script:workflowContent | Should -Match 'hosted_runner_required' @@ -53,6 +56,9 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match "ValidateSet\('Validate', 'CanaryCycle', 'PromotePrerelease', 'PromoteStable', 'FullCycle'\)" $script:runtimeContent | Should -Match 'Resolve-CanaryTargetSemVer' $script:runtimeContent | Should -Match 'Resolve-PromotedTargetSemVer' + $script:runtimeContent | Should -Match 'Get-ReleasePlanningState' + $script:runtimeContent | Should -Match 'Resolve-TargetPlanForMode' + $script:runtimeContent | Should -Match 'Get-ReleaseByTagOrNull' $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowPolicy' $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowDecision' @@ -65,6 +71,9 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'semver_only_enforce_utc' $script:runtimeContent | Should -Match 'semver_only_enforcement_violation' $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' + $script:runtimeContent | Should -Match 'release_tag_collision_retry_exhausted' + $script:runtimeContent | Should -Match 'release_dispatch_attempts_exhausted' + $script:runtimeContent | Should -Match '\[release_tag_collision\]' $script:runtimeContent | Should -Match 'release_watch_not_success' $script:runtimeContent | Should -Match 'release_verification_asset_missing' $script:runtimeContent | Should -Match 'release_verification_manifest_channel_mismatch' diff --git a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 index 78a22ff..bb11fd4 100644 --- a/tests/WorkspaceInstallerReleaseContract.Tests.ps1 +++ b/tests/WorkspaceInstallerReleaseContract.Tests.ps1 @@ -28,6 +28,9 @@ Describe 'Workspace installer release workflow contract' { $script:wrapperWorkflowContent | Should -Not -Match '(?m)^\s*push:' $script:wrapperWorkflowContent | Should -Not -Match '(?m)^\s*pull_request:' $script:wrapperWorkflowContent | Should -Not -Match '(?m)^\s*schedule:' + $script:wrapperWorkflowContent | Should -Match 'concurrency:' + $script:wrapperWorkflowContent | Should -Match 'group:\s*release-workspace-installer-\$\{\{\s*github\.repository\s*\}\}-\$\{\{\s*inputs\.release_tag\s*\}\}' + $script:wrapperWorkflowContent | Should -Match 'cancel-in-progress:\s*false' $script:wrapperWorkflowContent | Should -Match 'release_tag:' $script:wrapperWorkflowContent | Should -Match 'required:\s*true' $script:wrapperWorkflowContent | Should -Match 'type:\s*string' From 794fcdd25d37a88a1695b5558599b027e2b9efcc Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 23:24:52 -0800 Subject: [PATCH 43/60] Add release race-hardening drill workflow and contracts --- .github/workflows/ci.yml | 1 + .../release-race-hardening-drill.yml | 93 +++ AGENTS.md | 9 + README.md | 12 + .../runbooks/release-ops-incident-response.md | 36 ++ scripts/Invoke-ReleaseRaceHardeningDrill.ps1 | 598 ++++++++++++++++++ ...ceHardeningDrillWorkflowContract.Tests.ps1 | 61 ++ tests/ScopeAOpsRunbookContract.Tests.ps1 | 8 + tests/WorkspaceSurfaceContract.Tests.ps1 | 9 + 9 files changed, 827 insertions(+) create mode 100644 .github/workflows/release-race-hardening-drill.yml create mode 100644 scripts/Invoke-ReleaseRaceHardeningDrill.ps1 create mode 100644 tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 746cf45..4bdb36f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,6 +84,7 @@ jobs: './tests/OpsSloGateWorkflowContract.Tests.ps1', './tests/OpsPolicyDriftWorkflowContract.Tests.ps1', './tests/ReleaseControlPlaneWorkflowContract.Tests.ps1', + './tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1', './tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1', './tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1', './tests/WeeklyOpsSloReportWorkflowContract.Tests.ps1', diff --git a/.github/workflows/release-race-hardening-drill.yml b/.github/workflows/release-race-hardening-drill.yml new file mode 100644 index 0000000..6080ae6 --- /dev/null +++ b/.github/workflows/release-race-hardening-drill.yml @@ -0,0 +1,93 @@ +name: release-race-hardening-drill + +on: + schedule: + - cron: '40 9 * * 2' + workflow_dispatch: + inputs: + auto_remediate: + description: Allow release-control-plane to auto-remediate ops drift before canary dispatch. + required: false + default: true + type: boolean + keep_latest_canary_n: + description: Number of latest SemVer canary tags to keep after control-plane canary publish. + required: false + default: '1' + type: string + watch_timeout_minutes: + description: Timeout minutes for contender and control-plane workflow watches. + required: false + default: '120' + type: string + +permissions: + contents: read + actions: write + +jobs: + release-race-hardening-drill: + name: Release Race Hardening Drill + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Enforce hosted-runner lock + shell: pwsh + run: | + $ErrorActionPreference = 'Stop' + $runnerEnvironment = [string]$env:RUNNER_ENVIRONMENT + if ($runnerEnvironment -ne 'github-hosted') { + throw "hosted_runner_required: release-race-hardening-drill must run on a github-hosted runner. actual='$runnerEnvironment'" + } + + - name: Execute release race-hardening drill + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-race-hardening-drill-report.json' + + $keepLatestCanaryNText = [string]'${{ inputs.keep_latest_canary_n }}' + $keepLatestCanaryN = 1 + if (-not [string]::IsNullOrWhiteSpace($keepLatestCanaryNText)) { + $parsedKeepLatestCanaryN = 0 + if (-not [int]::TryParse($keepLatestCanaryNText, [ref]$parsedKeepLatestCanaryN)) { + throw "keep_latest_canary_n must be an integer. actual='$keepLatestCanaryNText'" + } + $keepLatestCanaryN = $parsedKeepLatestCanaryN + } + + $watchTimeoutText = [string]'${{ inputs.watch_timeout_minutes }}' + $watchTimeoutMinutes = 120 + if (-not [string]::IsNullOrWhiteSpace($watchTimeoutText)) { + $parsedWatchTimeoutMinutes = 0 + if (-not [int]::TryParse($watchTimeoutText, [ref]$parsedWatchTimeoutMinutes)) { + throw "watch_timeout_minutes must be an integer. actual='$watchTimeoutText'" + } + $watchTimeoutMinutes = $parsedWatchTimeoutMinutes + } + + $autoRemediateText = [string]'${{ inputs.auto_remediate }}' + $autoRemediate = $true + if (-not [string]::IsNullOrWhiteSpace($autoRemediateText)) { + $autoRemediate = [System.Convert]::ToBoolean($autoRemediateText) + } + + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseRaceHardeningDrill.ps1 ` + -Repository '${{ github.repository }}' ` + -Branch 'main' ` + -AutoRemediate:$autoRemediate ` + -KeepLatestCanaryN $keepLatestCanaryN ` + -WatchTimeoutMinutes $watchTimeoutMinutes ` + -OutputPath $reportPath + + - name: Upload release race-hardening drill report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-race-hardening-drill-report-${{ github.run_id }} + path: ${{ runner.temp }}/release-race-hardening-drill-report.json + if-no-files-found: error diff --git a/AGENTS.md b/AGENTS.md index 7e395e2..9e881b6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -301,6 +301,15 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `rollback_candidate_missing` - `rollback_assets_missing` - `rollback_drill_runtime_error` +- `.github/workflows/release-race-hardening-drill.yml` must run `scripts/Invoke-ReleaseRaceHardeningDrill.ps1`. +- Race-hardening drill must dispatch both `release-workspace-installer.yml` (contender) and `release-control-plane.yml` (`mode=CanaryCycle`, `dry_run=false`) and validate collision handling using control-plane artifact evidence. +- Race-hardening drill reason codes must remain explicit: + - `drill_passed` + - `control_plane_collision_not_observed` + - `control_plane_report_download_failed` + - `control_plane_report_missing` + - `control_plane_run_failed` + - `race_hardening_drill_runtime_error` - Operational incident handling runbook is `docs/runbooks/release-ops-incident-response.md`. ## Integration Gate Policy diff --git a/README.md b/README.md index 8b1299d..c0b711b 100644 --- a/README.md +++ b/README.md @@ -466,6 +466,18 @@ Underlying rollback evaluator `scripts/Invoke-ReleaseRollbackDrill.ps1` still em - `rollback_candidate_missing` - `rollback_assets_missing` +`release-race-hardening-drill.yml` is scheduled weekly and supports manual dispatch. It runs `scripts/Invoke-ReleaseRaceHardeningDrill.ps1` to prove release-tag collision handling under parallel dispatch pressure: +- dispatches a contender `release-workspace-installer.yml` run at predicted next SemVer canary tag +- dispatches `release-control-plane.yml` in `CanaryCycle` mode immediately after +- watches both runs and downloads `release-control-plane-report-` artifact +- requires collision evidence in control-plane execution (`collision_retries >= 1` and/or collision attempt statuses) +- requires release verification evidence from control-plane report (`release_verification.status=pass`) +- deterministic failure reason codes include: + - `control_plane_collision_not_observed` + - `control_plane_report_download_failed` + - `control_plane_report_missing` + - `control_plane_run_failed` + ## Local Docker package for control-plane exercise Run the local Docker harness (safe default, validate + dry-run): diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 0e19fac..56eb788 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -234,9 +234,45 @@ gh workflow run release-rollback-drill.yml -R LabVIEW-Community-CI-CD/labview-cd -f auto_self_heal=false ``` +## Release Race-Hardening Drill Dispatch +Run collision-retry verification drill on the canary release lane: + +```powershell +gh workflow run release-race-hardening-drill.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f auto_remediate=true ` + -f keep_latest_canary_n=1 ` + -f watch_timeout_minutes=120 +``` + +Run the same drill directly from the repo: + +```powershell +Set-Location D:\dev\labview-cdev-surface-fork +pwsh -File .\scripts\Invoke-ReleaseRaceHardeningDrill.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -Branch main ` + -AutoRemediate:$true ` + -KeepLatestCanaryN 1 ` + -WatchTimeoutMinutes 120 +``` + +Expected pass evidence in `release-race-hardening-drill-report.json`: +- `reason_code=drill_passed` +- `evidence.collision_observed=true` +- `evidence.collision_signals` includes at least one collision marker (`collision_retries_ge_1`, `attempt_status_collision_*`, or `dispatch_status_collision_*`) +- `artifacts.control_plane_report_artifact` is `release-control-plane-report-` +- `evidence.release_verification_status=pass` + +Deterministic drill failure reason codes: +- `control_plane_collision_not_observed` +- `control_plane_report_download_failed` +- `control_plane_report_missing` +- `control_plane_run_failed` + ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` - `release-control-plane-override-audit.json` (when override is requested/applied) +- `release-race-hardening-drill-report.json` - sync guard run URL - parity SHAs (upstream and fork) diff --git a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 new file mode 100644 index 0000000..872ee6b --- /dev/null +++ b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 @@ -0,0 +1,598 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$Branch = 'main', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$ReleaseWorkflowFile = 'release-workspace-installer.yml', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$ControlPlaneWorkflowFile = 'release-control-plane.yml', + + [Parameter()] + [ValidateRange(20, 200)] + [int]$ReleaseLimit = 100, + + [Parameter()] + [ValidateRange(5, 240)] + [int]$WatchTimeoutMinutes = 120, + + [Parameter()] + [ValidateRange(1, 10)] + [int]$KeepLatestCanaryN = 1, + + [Parameter()] + [bool]$AutoRemediate = $true, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' +$watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' + +foreach ($requiredScript in @($dispatchWorkflowScript, $watchWorkflowScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +function Add-UniqueMessage { + param( + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$Message + ) + + if (-not $Target.Contains($Message)) { + [void]$Target.Add($Message) + } +} + +function Resolve-RaceDrillFailureReasonCode { + param([Parameter()][string]$MessageText = '') + + $message = [string]$MessageText + if ($message -match '^required_script_missing') { return 'required_script_missing' } + if ($message -match '^contender_release_dispatch_failed') { return 'contender_release_dispatch_failed' } + if ($message -match '^control_plane_dispatch_failed') { return 'control_plane_dispatch_failed' } + if ($message -match '^control_plane_run_failed') { return 'control_plane_run_failed' } + if ($message -match '^control_plane_report_download_failed') { return 'control_plane_report_download_failed' } + if ($message -match '^control_plane_report_missing') { return 'control_plane_report_missing' } + if ($message -match '^control_plane_report_failed') { return 'control_plane_report_failed' } + if ($message -match '^control_plane_canary_execution_missing') { return 'control_plane_canary_execution_missing' } + if ($message -match '^control_plane_release_verification_missing') { return 'control_plane_release_verification_missing' } + if ($message -match '^control_plane_release_verification_failed') { return 'control_plane_release_verification_failed' } + if ($message -match '^control_plane_collision_not_observed') { return 'control_plane_collision_not_observed' } + if ($message -match '^gh_command_failed') { return 'gh_command_failed' } + + return 'race_hardening_drill_runtime_error' +} + +function Get-SequenceFromLabel { + param( + [Parameter(Mandatory = $true)][string]$Label, + [Parameter(Mandatory = $true)][string]$Token + ) + + $pattern = "(?i)(?:^|[.-]){0}[.-](?\d+)(?:$|[.-])" -f [regex]::Escape($Token) + $match = [regex]::Match($Label, $pattern) + if (-not $match.Success) { + return 0 + } + + $value = 0 + if (-not [int]::TryParse([string]$match.Groups['n'].Value, [ref]$value)) { + return 0 + } + + return $value +} + +function Parse-ReleaseTagRecord { + param( + [Parameter(Mandatory = $true)][string]$TagName, + [Parameter(Mandatory = $true)][bool]$IsPrerelease + ) + + $semverMatch = [regex]::Match( + $TagName, + '^v(?0|[1-9]\d*)\.(?0|[1-9]\d*)\.(?0|[1-9]\d*)(?:-(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?$' + ) + if (-not $semverMatch.Success) { + return $null + } + + $prereleaseLabel = [string]$semverMatch.Groups['prerelease'].Value + $channel = 'stable' + $sequence = 0 + if (-not [string]::IsNullOrWhiteSpace($prereleaseLabel)) { + if ($prereleaseLabel -match '(?i)(^|[.\-])canary([.\-]|$)') { + $channel = 'canary' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'canary' + } else { + $channel = 'prerelease' + $sequence = Get-SequenceFromLabel -Label $prereleaseLabel -Token 'rc' + } + } + + return [pscustomobject]@{ + tag_name = $TagName + tag_family = 'semver' + channel = $channel + major = [int]$semverMatch.Groups['major'].Value + minor = [int]$semverMatch.Groups['minor'].Value + patch = [int]$semverMatch.Groups['patch'].Value + prerelease_sequence = $sequence + is_prerelease = $IsPrerelease + } +} + +function New-CoreVersion { + param( + [Parameter(Mandatory = $true)][int]$Major, + [Parameter(Mandatory = $true)][int]$Minor, + [Parameter(Mandatory = $true)][int]$Patch + ) + + return [pscustomobject]@{ + major = $Major + minor = $Minor + patch = $Patch + } +} + +function Format-CoreVersion { + param([Parameter(Mandatory = $true)]$Core) + return "{0}.{1}.{2}" -f [int]$Core.major, [int]$Core.minor, [int]$Core.patch +} + +function Compare-CoreVersion { + param( + [Parameter(Mandatory = $true)]$Left, + [Parameter(Mandatory = $true)]$Right + ) + + foreach ($part in @('major', 'minor', 'patch')) { + $leftValue = [int]$Left.$part + $rightValue = [int]$Right.$part + if ($leftValue -gt $rightValue) { return 1 } + if ($leftValue -lt $rightValue) { return -1 } + } + + return 0 +} + +function Get-MaxCoreVersion { + param([Parameter(Mandatory = $true)][AllowEmptyCollection()][object[]]$Records = @()) + + $maxCore = $null + foreach ($record in @($Records)) { + $candidate = New-CoreVersion -Major ([int]$record.major) -Minor ([int]$record.minor) -Patch ([int]$record.patch) + if ($null -eq $maxCore) { + $maxCore = $candidate + continue + } + + if ((Compare-CoreVersion -Left $candidate -Right $maxCore) -gt 0) { + $maxCore = $candidate + } + } + + return $maxCore +} + +function Get-NextSemVerCanaryTag { + param( + [Parameter(Mandatory = $true)][string]$TargetRepository, + [Parameter(Mandatory = $true)][int]$MaxReleases + ) + + $releases = @(Get-GhReleasesPortable -Repository $TargetRepository -Limit $MaxReleases -ExcludeDrafts) + $semverRecords = @( + $releases | + ForEach-Object { Parse-ReleaseTagRecord -TagName ([string]$_.tagName) -IsPrerelease ([bool]$_.isPrerelease) } | + Where-Object { $null -ne $_ -and [string]$_.tag_family -eq 'semver' } + ) + + $stableSemver = @($semverRecords | Where-Object { [string]$_.channel -eq 'stable' }) + $nonStableSemver = @($semverRecords | Where-Object { [string]$_.channel -eq 'canary' -or [string]$_.channel -eq 'prerelease' }) + + $latestStableCore = Get-MaxCoreVersion -Records $stableSemver + $latestNonStableCore = Get-MaxCoreVersion -Records $nonStableSemver + + $targetCore = $null + if ($null -ne $latestNonStableCore -and (($null -eq $latestStableCore) -or ((Compare-CoreVersion -Left $latestNonStableCore -Right $latestStableCore) -gt 0))) { + $targetCore = $latestNonStableCore + } elseif ($null -ne $latestStableCore) { + $targetCore = New-CoreVersion -Major ([int]$latestStableCore.major) -Minor ([int]$latestStableCore.minor) -Patch ([int]$latestStableCore.patch + 1) + } elseif ($null -ne $latestNonStableCore) { + $targetCore = $latestNonStableCore + } else { + $targetCore = New-CoreVersion -Major 0 -Minor 1 -Patch 0 + } + + $matchedCanary = @( + $semverRecords | + Where-Object { + ([string]$_.channel -eq 'canary') -and + ([int]$_.major -eq [int]$targetCore.major) -and + ([int]$_.minor -eq [int]$targetCore.minor) -and + ([int]$_.patch -eq [int]$targetCore.patch) + } | + ForEach-Object { [int]$_.prerelease_sequence } + ) + + $nextCanarySequence = if (@($matchedCanary).Count -eq 0) { + 1 + } else { + ((@($matchedCanary) | Measure-Object -Maximum).Maximum + 1) + } + if ($nextCanarySequence -gt 9999) { + throw "semver_prerelease_sequence_exhausted: channel=canary core=$(Format-CoreVersion -Core $targetCore) next_sequence=$nextCanarySequence" + } + + return [ordered]@{ + tag_family = 'semver' + core = Format-CoreVersion -Core $targetCore + prerelease_sequence = $nextCanarySequence + tag = "v$(Format-CoreVersion -Core $targetCore)-canary.$nextCanarySequence" + } +} + +function Invoke-WorkflowWatchCapture { + param( + [Parameter(Mandatory = $true)][string]$TargetRepository, + [Parameter(Mandatory = $true)][string]$RunId, + [Parameter(Mandatory = $true)][int]$TimeoutMinutes, + [Parameter(Mandatory = $true)][string]$ReportPath + ) + + $runtimeError = '' + $exitCode = 1 + try { + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $TargetRepository ` + -RunId $RunId ` + -TimeoutMinutes $TimeoutMinutes ` + -OutputPath $ReportPath | Out-Null + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + } catch { + $runtimeError = [string]$_.Exception.Message + $exitCode = 1 + } + + $watchReport = $null + if (Test-Path -LiteralPath $ReportPath -PathType Leaf) { + $watchReport = Get-Content -LiteralPath $ReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + + if ($null -eq $watchReport) { + $watchReport = [pscustomobject]@{ + run_id = $RunId + status = 'unknown' + conclusion = '' + url = '' + classified_reason = 'watch_report_missing' + } + } elseif (-not [string]::IsNullOrWhiteSpace($runtimeError)) { + if ([string]::IsNullOrWhiteSpace([string]$watchReport.classified_reason)) { + $watchReport | Add-Member -NotePropertyName classified_reason -NotePropertyValue 'watch_runtime_error' -Force + } + } + + $successful = ($exitCode -eq 0 -and [string]$watchReport.conclusion -eq 'success') + return [ordered]@{ + successful = [bool]$successful + exit_code = $exitCode + runtime_error = $runtimeError + report = $watchReport + } +} + +$scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("release-race-hardening-" + [Guid]::NewGuid().ToString('N')) +New-Item -Path $scratchRoot -ItemType Directory -Force | Out-Null + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + branch = $Branch + release_workflow = $ReleaseWorkflowFile + control_plane_workflow = $ControlPlaneWorkflowFile + release_limit = $ReleaseLimit + watch_timeout_minutes = $WatchTimeoutMinutes + auto_remediate = [bool]$AutoRemediate + keep_latest_canary_n = $KeepLatestCanaryN + predicted_canary_tag = '' + predicted_canary_core = '' + predicted_canary_sequence = 0 + status = 'fail' + reason_code = '' + message = '' + warnings = @() + dispatches = [ordered]@{ + contender_release = $null + control_plane = $null + } + watches = [ordered]@{ + contender_release = $null + control_plane = $null + } + artifacts = [ordered]@{ + control_plane_report_artifact = '' + control_plane_report_path = '' + } + control_plane_report_summary = [ordered]@{ + status = '' + reason_code = '' + mode = '' + message = '' + } + evidence = [ordered]@{ + dispatch_gap_seconds = 0 + collision_observed = $false + collision_signals = @() + collision_retries = 0 + predicted_target_tag = '' + final_target_tag = '' + dispatch_status = '' + dispatch_reason_code = '' + attempt_history_statuses = @() + release_verification_status = '' + release_verification_url = '' + } +} + +$warnings = [System.Collections.Generic.List[string]]::new() +$collisionSignals = [System.Collections.Generic.List[string]]::new() + +try { + $targetTagRecord = Get-NextSemVerCanaryTag -TargetRepository $Repository -MaxReleases $ReleaseLimit + $report.predicted_canary_tag = [string]$targetTagRecord.tag + $report.predicted_canary_core = [string]$targetTagRecord.core + $report.predicted_canary_sequence = [int]$targetTagRecord.prerelease_sequence + $report.evidence.predicted_target_tag = [string]$targetTagRecord.tag + + $contenderDispatchPath = Join-Path $scratchRoot 'contender-release-dispatch.json' + $contenderDispatchInputs = @( + "release_tag=$([string]$targetTagRecord.tag)", + 'allow_existing_tag=false', + 'prerelease=true', + 'release_channel=canary' + ) + & $dispatchWorkflowScript ` + -Repository $Repository ` + -WorkflowFile $ReleaseWorkflowFile ` + -Branch $Branch ` + -Inputs $contenderDispatchInputs ` + -OutputPath $contenderDispatchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "contender_release_dispatch_failed: workflow=$ReleaseWorkflowFile exit_code=$LASTEXITCODE" + } + $contenderDispatch = Get-Content -LiteralPath $contenderDispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $report.dispatches.contender_release = [ordered]@{ + run_id = [string]$contenderDispatch.run_id + head_sha = [string]$contenderDispatch.head_sha + status = [string]$contenderDispatch.status + url = [string]$contenderDispatch.url + inputs = @($contenderDispatch.inputs | ForEach-Object { [string]$_ }) + timestamp_utc = [string]$contenderDispatch.timestamp_utc + } + + $controlPlaneDispatchPath = Join-Path $scratchRoot 'control-plane-dispatch.json' + $controlPlaneDispatchInputs = @( + 'mode=CanaryCycle', + "auto_remediate=$(([string]$AutoRemediate).ToLowerInvariant())", + "keep_latest_canary_n=$KeepLatestCanaryN", + 'dry_run=false' + ) + & $dispatchWorkflowScript ` + -Repository $Repository ` + -WorkflowFile $ControlPlaneWorkflowFile ` + -Branch $Branch ` + -Inputs $controlPlaneDispatchInputs ` + -OutputPath $controlPlaneDispatchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "control_plane_dispatch_failed: workflow=$ControlPlaneWorkflowFile exit_code=$LASTEXITCODE" + } + $controlPlaneDispatch = Get-Content -LiteralPath $controlPlaneDispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $report.dispatches.control_plane = [ordered]@{ + run_id = [string]$controlPlaneDispatch.run_id + head_sha = [string]$controlPlaneDispatch.head_sha + status = [string]$controlPlaneDispatch.status + url = [string]$controlPlaneDispatch.url + inputs = @($controlPlaneDispatch.inputs | ForEach-Object { [string]$_ }) + timestamp_utc = [string]$controlPlaneDispatch.timestamp_utc + } + + $contenderDispatchedAt = [DateTimeOffset]::MinValue + $controlPlaneDispatchedAt = [DateTimeOffset]::MinValue + $hasContenderTimestamp = [DateTimeOffset]::TryParse([string]$contenderDispatch.timestamp_utc, [ref]$contenderDispatchedAt) + $hasControlPlaneTimestamp = [DateTimeOffset]::TryParse([string]$controlPlaneDispatch.timestamp_utc, [ref]$controlPlaneDispatchedAt) + if ($hasContenderTimestamp -and $hasControlPlaneTimestamp) { + $gapSeconds = [Math]::Abs(($controlPlaneDispatchedAt - $contenderDispatchedAt).TotalSeconds) + $report.evidence.dispatch_gap_seconds = [Math]::Round($gapSeconds, 3) + } + + $contenderWatchPath = Join-Path $scratchRoot 'contender-release-watch.json' + $contenderWatch = Invoke-WorkflowWatchCapture ` + -TargetRepository $Repository ` + -RunId ([string]$contenderDispatch.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -ReportPath $contenderWatchPath + $report.watches.contender_release = [ordered]@{ + run_id = [string]$contenderWatch.report.run_id + status = [string]$contenderWatch.report.status + conclusion = [string]$contenderWatch.report.conclusion + classified_reason = [string]$contenderWatch.report.classified_reason + url = [string]$contenderWatch.report.url + successful = [bool]$contenderWatch.successful + exit_code = [int]$contenderWatch.exit_code + runtime_error = [string]$contenderWatch.runtime_error + } + if (-not [bool]$contenderWatch.successful) { + Add-UniqueMessage -Target $warnings -Message "contender_watch_non_success: run_id=$([string]$contenderWatch.report.run_id) conclusion=$([string]$contenderWatch.report.conclusion)" + } + + $controlPlaneWatchPath = Join-Path $scratchRoot 'control-plane-watch.json' + $controlPlaneWatch = Invoke-WorkflowWatchCapture ` + -TargetRepository $Repository ` + -RunId ([string]$controlPlaneDispatch.run_id) ` + -TimeoutMinutes $WatchTimeoutMinutes ` + -ReportPath $controlPlaneWatchPath + $report.watches.control_plane = [ordered]@{ + run_id = [string]$controlPlaneWatch.report.run_id + status = [string]$controlPlaneWatch.report.status + conclusion = [string]$controlPlaneWatch.report.conclusion + classified_reason = [string]$controlPlaneWatch.report.classified_reason + url = [string]$controlPlaneWatch.report.url + successful = [bool]$controlPlaneWatch.successful + exit_code = [int]$controlPlaneWatch.exit_code + runtime_error = [string]$controlPlaneWatch.runtime_error + } + if (-not [bool]$controlPlaneWatch.successful) { + $controlPlaneConclusion = [string]$controlPlaneWatch.report.conclusion + throw "control_plane_run_failed: run_id=$([string]$controlPlaneDispatch.run_id) conclusion=$controlPlaneConclusion" + } + + $controlPlaneRunId = [string]$controlPlaneDispatch.run_id + $controlPlaneArtifactName = "release-control-plane-report-$controlPlaneRunId" + $report.artifacts.control_plane_report_artifact = $controlPlaneArtifactName + $artifactRoot = Join-Path $scratchRoot 'control-plane-report-artifact' + New-Item -Path $artifactRoot -ItemType Directory -Force | Out-Null + + & gh run download $controlPlaneRunId -R $Repository -n $controlPlaneArtifactName -D $artifactRoot + $downloadExit = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($downloadExit -ne 0) { + throw "control_plane_report_download_failed: run_id=$controlPlaneRunId artifact=$controlPlaneArtifactName exit_code=$downloadExit" + } + + $controlPlaneReportPath = @( + Get-ChildItem -Path $artifactRoot -Recurse -File -Filter 'release-control-plane-report.json' | + Select-Object -First 1 -ExpandProperty FullName + ) + if (@($controlPlaneReportPath).Count -ne 1 -or [string]::IsNullOrWhiteSpace([string]$controlPlaneReportPath[0])) { + throw "control_plane_report_missing: run_id=$controlPlaneRunId artifact=$controlPlaneArtifactName" + } + $report.artifacts.control_plane_report_path = [string]$controlPlaneReportPath[0] + + $controlPlaneReport = Get-Content -LiteralPath ([string]$controlPlaneReportPath[0]) -Raw | ConvertFrom-Json -Depth 100 + $report.control_plane_report_summary = [ordered]@{ + status = [string]$controlPlaneReport.status + reason_code = [string]$controlPlaneReport.reason_code + mode = [string]$controlPlaneReport.mode + message = [string]$controlPlaneReport.message + } + if ([string]$controlPlaneReport.status -ne 'pass') { + throw "control_plane_report_failed: reason_code=$([string]$controlPlaneReport.reason_code) message=$([string]$controlPlaneReport.message)" + } + + $canaryExecution = @( + @($controlPlaneReport.executions) | + Where-Object { + [string]$_.target_release.mode -eq 'CanaryCycle' -or + [string]$_.target_release.channel -eq 'canary' + } | + Select-Object -First 1 + ) + if (@($canaryExecution).Count -ne 1) { + throw 'control_plane_canary_execution_missing: canary execution record not found in control-plane report.' + } + + $targetRelease = $canaryExecution[0].target_release + $dispatchRecord = $canaryExecution[0].dispatch + $releaseVerification = $canaryExecution[0].release_verification + + if ($null -eq $releaseVerification) { + throw 'control_plane_release_verification_missing: canary execution missing release_verification payload.' + } + if ([string]$releaseVerification.status -ne 'pass') { + throw "control_plane_release_verification_failed: status=$([string]$releaseVerification.status)" + } + + $attemptHistoryStatuses = @($targetRelease.dispatch_attempt_history | ForEach-Object { [string]$_.status }) + $collisionRetries = 0 + [void][int]::TryParse([string]$targetRelease.collision_retries, [ref]$collisionRetries) + if ($collisionRetries -ge 1) { + Add-UniqueMessage -Target $collisionSignals -Message 'collision_retries_ge_1' + } + + $dispatchStatus = [string]$dispatchRecord.status + if ($dispatchStatus -like 'collision_*') { + Add-UniqueMessage -Target $collisionSignals -Message ("dispatch_status_{0}" -f $dispatchStatus) + } + + $dispatchReasonCode = [string]$dispatchRecord.reason_code + if ($dispatchReasonCode -eq 'tag_already_published_by_peer') { + Add-UniqueMessage -Target $collisionSignals -Message ("dispatch_reason_{0}" -f $dispatchReasonCode) + } + + foreach ($attemptStatus in @($attemptHistoryStatuses)) { + if ([string]$attemptStatus -like 'collision_*') { + Add-UniqueMessage -Target $collisionSignals -Message ("attempt_status_{0}" -f [string]$attemptStatus) + } + } + + $targetTag = [string]$targetRelease.tag + if (-not [string]::Equals($targetTag, [string]$targetTagRecord.tag, [System.StringComparison]::Ordinal)) { + Add-UniqueMessage -Target $warnings -Message ("target_tag_replanned: predicted={0} final={1}" -f [string]$targetTagRecord.tag, $targetTag) + } + + $manifestProvenanceAssets = @($releaseVerification.manifest_provenance_assets_checked | ForEach-Object { [string]$_ }) + if ($manifestProvenanceAssets -notcontains 'reproducibility-report.json') { + throw 'control_plane_release_verification_failed: release verification did not report reproducibility-report.json provenance check.' + } + + $report.evidence = [ordered]@{ + dispatch_gap_seconds = [double]$report.evidence.dispatch_gap_seconds + collision_observed = (@($collisionSignals).Count -gt 0) + collision_signals = @($collisionSignals) + collision_retries = $collisionRetries + predicted_target_tag = [string]$targetTagRecord.tag + final_target_tag = $targetTag + dispatch_status = $dispatchStatus + dispatch_reason_code = $dispatchReasonCode + attempt_history_statuses = @($attemptHistoryStatuses) + release_verification_status = [string]$releaseVerification.status + release_verification_url = [string]$releaseVerification.release_url + } + + if (-not [bool]$report.evidence.collision_observed) { + throw ("control_plane_collision_not_observed: predicted_tag={0} final_tag={1} dispatch_status={2} collision_retries={3}" -f [string]$targetTagRecord.tag, $targetTag, $dispatchStatus, $collisionRetries) + } + + $report.status = 'pass' + $report.reason_code = 'drill_passed' + $report.message = 'Race-hardening drill passed with collision evidence and verified canary release metadata.' +} +catch { + $report.status = 'fail' + $report.message = [string]$_.Exception.Message + $report.reason_code = Resolve-RaceDrillFailureReasonCode -MessageText $report.message +} +finally { + $report.warnings = @($warnings) + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + if (Test-Path -LiteralPath $scratchRoot -PathType Container) { + Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..7d0553b --- /dev/null +++ b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 @@ -0,0 +1,61 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release race-hardening drill workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/release-race-hardening-drill.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRaceHardeningDrill.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Release race-hardening contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable with bounded drill controls' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'auto_remediate' + $script:workflowContent | Should -Match 'keep_latest_canary_n' + $script:workflowContent | Should -Match 'watch_timeout_minutes' + } + + It 'runs on hosted runner, executes drill runtime, and uploads report artifact' { + $script:workflowContent | Should -Match 'runs-on:\s*ubuntu-latest' + $script:workflowContent | Should -Match 'Enforce hosted-runner lock' + $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' + $script:workflowContent | Should -Match 'hosted_runner_required' + $script:workflowContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:workflowContent | Should -Match 'release-race-hardening-drill-report\.json' + $script:workflowContent | Should -Match 'actions:\s*write' + } + + It 'dispatches contender and control-plane workflows then verifies collision evidence from control-plane report artifact' { + $script:runtimeContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' + $script:runtimeContent | Should -Match 'Watch-WorkflowRun\.ps1' + $script:runtimeContent | Should -Match 'release-workspace-installer\.yml' + $script:runtimeContent | Should -Match 'release-control-plane\.yml' + $script:runtimeContent | Should -Match 'mode=CanaryCycle' + $script:runtimeContent | Should -Match 'release-control-plane-report-' + $script:runtimeContent | Should -Match 'gh run download' + $script:runtimeContent | Should -Match 'control_plane_collision_not_observed' + $script:runtimeContent | Should -Match 'collision_retries' + $script:runtimeContent | Should -Match 'tag_already_published_by_peer' + $script:runtimeContent | Should -Match 'reproducibility-report\.json' + $script:runtimeContent | Should -Match 'drill_passed' + } + + It 'computes semver canary target tags deterministically' { + $script:runtimeContent | Should -Match 'Get-NextSemVerCanaryTag' + $script:runtimeContent | Should -Match "tag_family = 'semver'" + $script:runtimeContent | Should -Match '-canary\.' + $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' + } +} diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index d848d71..f6fd103 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -30,9 +30,13 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'ops-slo-gate\.yml' $script:runbookContent | Should -Match 'ops-policy-drift-check\.yml' $script:runbookContent | Should -Match 'release-rollback-drill\.yml' + $script:runbookContent | Should -Match 'release-race-hardening-drill\.yml' + $script:runbookContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' $script:runbookContent | Should -Match 'auto_self_heal=false' $script:runbookContent | Should -Match '20260226' $script:runbookContent | Should -Match 'release_verification_failed' + $script:runbookContent | Should -Match 'control_plane_collision_not_observed' + $script:runbookContent | Should -Match 'drill_passed' $script:runbookContent | Should -Match 'promotion_lineage_invalid' $script:runbookContent | Should -Match 'stable_window_override_invalid' $script:runbookContent | Should -Match 'release-manifest\.json' @@ -49,8 +53,10 @@ Describe 'Scope A ops runbook contract' { $script:readmeContent | Should -Match 'ops-slo-gate\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' + $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' + $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' $script:readmeContent | Should -Match 'release-ops-incident-response\.md' $script:agentsContent | Should -Match 'Ops Monitoring Policy' @@ -60,7 +66,9 @@ Describe 'Scope A ops runbook contract' { $script:agentsContent | Should -Match 'ops-slo-gate\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' + $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' + $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' } } diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 1efaac6..3eae4b0 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -36,6 +36,7 @@ Describe 'Workspace surface contract' { $script:opsPolicyDriftScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseControlPlanePolicyDrift.ps1' $script:rollbackDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' $script:rollbackSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-RollbackDrillSelfHealing.ps1' + $script:raceHardeningDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRaceHardeningDrill.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' $script:windowsContainerNsisSelfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' $script:windowsContainerNsisDockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' @@ -54,6 +55,7 @@ Describe 'Workspace surface contract' { $script:opsSloGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' $script:opsPolicyDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-policy-drift-check.yml' $script:rollbackDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' + $script:raceHardeningDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-race-hardening-drill.yml' $script:linuxNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-linux-nsis-parity-image.yml' $script:windowsNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-windows-nsis-parity-image.yml' $script:windowsImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/windows-labview-image-gate.yml' @@ -103,6 +105,7 @@ Describe 'Workspace surface contract' { $script:opsPolicyDriftScriptPath, $script:rollbackDrillScriptPath, $script:rollbackSelfHealingScriptPath, + $script:raceHardeningDrillScriptPath, $script:dockerLinuxIterationScriptPath, $script:windowsContainerNsisSelfTestScriptPath, $script:windowsContainerNsisDockerfilePath, @@ -121,6 +124,7 @@ Describe 'Workspace surface contract' { $script:opsSloGateWorkflowPath, $script:opsPolicyDriftWorkflowPath, $script:rollbackDrillWorkflowPath, + $script:raceHardeningDrillWorkflowPath, $script:linuxNsisParityImagePublishWorkflowPath, $script:windowsNsisParityImagePublishWorkflowPath, $script:windowsImageGateWorkflowPath, @@ -392,8 +396,10 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'ops-slo-gate\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' + $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' + $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' $script:agentsContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:agentsContent | Should -Match 'workflow_failure_detected' $script:agentsContent | Should -Match 'release_client_drift' @@ -429,8 +435,10 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'ops-slo-gate\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' + $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' + $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' $script:readmeContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:readmeContent | Should -Match 'workflow_failure_detected' $script:readmeContent | Should -Match 'release_client_drift' @@ -478,6 +486,7 @@ Describe 'Workspace surface contract' { $script:ciWorkflowContent | Should -Match 'OpsIncidentLifecycleContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'OpsSloGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'OpsPolicyDriftWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'ReleaseRaceHardeningDrillWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'ReleaseRollbackDrillWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxLabviewImageGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxContainerNsisParityContract\.Tests\.ps1' From 117459fbfc6857e31f2f209e2fb7947c94a1945e Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 23:31:41 -0800 Subject: [PATCH 44/60] Harden race drill for optional dispatch fields --- scripts/Invoke-ReleaseRaceHardeningDrill.ps1 | 36 +++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 index 872ee6b..b822b64 100644 --- a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 +++ b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 @@ -61,6 +61,25 @@ function Add-UniqueMessage { } } +function Get-OptionalPropertyValue { + param( + [Parameter()][AllowNull()]$Object, + [Parameter(Mandatory = $true)][string]$Name, + [Parameter()][AllowNull()]$DefaultValue = $null + ) + + if ($null -eq $Object) { + return $DefaultValue + } + + $property = $Object.PSObject.Properties[$Name] + if ($null -eq $property) { + return $DefaultValue + } + + return $property.Value +} + function Resolve-RaceDrillFailureReasonCode { param([Parameter()][string]$MessageText = '') @@ -523,19 +542,26 @@ try { throw "control_plane_release_verification_failed: status=$([string]$releaseVerification.status)" } - $attemptHistoryStatuses = @($targetRelease.dispatch_attempt_history | ForEach-Object { [string]$_.status }) + $attemptHistory = @(Get-OptionalPropertyValue -Object $targetRelease -Name 'dispatch_attempt_history' -DefaultValue @()) + $attemptHistoryStatuses = @( + $attemptHistory | + ForEach-Object { + [string](Get-OptionalPropertyValue -Object $_ -Name 'status' -DefaultValue '') + } | + Where-Object { -not [string]::IsNullOrWhiteSpace([string]$_) } + ) $collisionRetries = 0 - [void][int]::TryParse([string]$targetRelease.collision_retries, [ref]$collisionRetries) + [void][int]::TryParse([string](Get-OptionalPropertyValue -Object $targetRelease -Name 'collision_retries' -DefaultValue 0), [ref]$collisionRetries) if ($collisionRetries -ge 1) { Add-UniqueMessage -Target $collisionSignals -Message 'collision_retries_ge_1' } - $dispatchStatus = [string]$dispatchRecord.status + $dispatchStatus = [string](Get-OptionalPropertyValue -Object $dispatchRecord -Name 'status' -DefaultValue '') if ($dispatchStatus -like 'collision_*') { Add-UniqueMessage -Target $collisionSignals -Message ("dispatch_status_{0}" -f $dispatchStatus) } - $dispatchReasonCode = [string]$dispatchRecord.reason_code + $dispatchReasonCode = [string](Get-OptionalPropertyValue -Object $dispatchRecord -Name 'reason_code' -DefaultValue '') if ($dispatchReasonCode -eq 'tag_already_published_by_peer') { Add-UniqueMessage -Target $collisionSignals -Message ("dispatch_reason_{0}" -f $dispatchReasonCode) } @@ -546,7 +572,7 @@ try { } } - $targetTag = [string]$targetRelease.tag + $targetTag = [string](Get-OptionalPropertyValue -Object $targetRelease -Name 'tag' -DefaultValue '') if (-not [string]::Equals($targetTag, [string]$targetTagRecord.tag, [System.StringComparison]::Ordinal)) { Add-UniqueMessage -Target $warnings -Message ("target_tag_replanned: predicted={0} final={1}" -f [string]$targetTagRecord.tag, $targetTag) } From 8d8e469a27fac5e8333665d7a385ee0166c94de9 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Thu, 26 Feb 2026 23:47:52 -0800 Subject: [PATCH 45/60] Promote race drill to enforcement gate with incident lifecycle --- .github/workflows/integration-gate.yml | 3 +- .../release-race-hardening-drill.yml | 143 ++++++++++++++++++ AGENTS.md | 7 +- README.md | 13 +- .../runbooks/release-ops-incident-response.md | 17 +++ .../IntegrationGateWorkflowContract.Tests.ps1 | 3 +- ...ceHardeningDrillWorkflowContract.Tests.ps1 | 14 +- tests/ScopeAOpsRunbookContract.Tests.ps1 | 6 + tests/WorkspaceSurfaceContract.Tests.ps1 | 4 + 9 files changed, 205 insertions(+), 5 deletions(-) diff --git a/.github/workflows/integration-gate.yml b/.github/workflows/integration-gate.yml index f956b6c..139b322 100644 --- a/.github/workflows/integration-gate.yml +++ b/.github/workflows/integration-gate.yml @@ -50,7 +50,8 @@ jobs: 'CI Pipeline', 'Workspace Installer Contract', 'Reproducibility Contract', - 'Provenance Contract' + 'Provenance Contract', + 'Release Race Hardening Drill' ) $pollSeconds = 20 diff --git a/.github/workflows/release-race-hardening-drill.yml b/.github/workflows/release-race-hardening-drill.yml index 6080ae6..f3ea405 100644 --- a/.github/workflows/release-race-hardening-drill.yml +++ b/.github/workflows/release-race-hardening-drill.yml @@ -1,6 +1,9 @@ name: release-race-hardening-drill on: + push: + branches: + - integration/** schedule: - cron: '40 9 * * 2' workflow_dispatch: @@ -24,6 +27,7 @@ on: permissions: contents: read actions: write + issues: write jobs: release-race-hardening-drill: @@ -91,3 +95,142 @@ jobs: name: release-race-hardening-drill-report-${{ github.run_id }} path: ${{ runner.temp }}/release-race-hardening-drill-report.json if-no-files-found: error + + - name: Write release race-hardening weekly summary + if: always() + shell: pwsh + env: + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-race-hardening-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release race-hardening drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $summaryPath = Join-Path $env:RUNNER_TEMP 'release-race-hardening-weekly-summary.json' + $summary = [ordered]@{ + schema_version = '1.0' + generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') + repository = '${{ github.repository }}' + run_id = '${{ github.run_id }}' + run_url = $env:RUN_URL + workflow = 'release-race-hardening-drill.yml' + status = [string]$report.status + reason_code = [string]$report.reason_code + collision_observed = [bool]$report.evidence.collision_observed + collision_retries = [int]$report.evidence.collision_retries + collision_signals = @($report.evidence.collision_signals | ForEach-Object { [string]$_ }) + predicted_target_tag = [string]$report.evidence.predicted_target_tag + final_target_tag = [string]$report.evidence.final_target_tag + dispatch_status = [string]$report.evidence.dispatch_status + release_verification_status = [string]$report.evidence.release_verification_status + control_plane_report_artifact = [string]$report.artifacts.control_plane_report_artifact + } + $summary | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $summaryPath -Encoding utf8 + + if (-not [string]::IsNullOrWhiteSpace([string]$env:GITHUB_STEP_SUMMARY)) { + $collisionSignals = @($summary.collision_signals) + $collisionSignalsText = if ($collisionSignals.Count -gt 0) { [string]::Join(', ', $collisionSignals) } else { 'none' } + @( + '## Release Race Hardening Drill Summary', + "", + "- Run: $env:RUN_URL", + "- Status: $($summary.status)", + "- Reason code: $($summary.reason_code)", + "- Collision observed: $($summary.collision_observed)", + "- Collision retries: $($summary.collision_retries)", + "- Collision signals: $collisionSignalsText", + "- Predicted target tag: $($summary.predicted_target_tag)", + "- Final target tag: $($summary.final_target_tag)", + "- Release verification status: $($summary.release_verification_status)" + ) | Out-File -FilePath $env:GITHUB_STEP_SUMMARY -Append -Encoding utf8 + } + + - name: Upload release race-hardening weekly summary + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-race-hardening-weekly-summary-${{ github.run_id }} + path: ${{ runner.temp }}/release-race-hardening-weekly-summary.json + if-no-files-found: error + + - name: Update release race-hardening incident issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Race Hardening Drill Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-race-hardening-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release race-hardening drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $collisionSignals = @($report.evidence.collision_signals | ForEach-Object { [string]$_ }) + $collisionSignalsText = if ($collisionSignals.Count -gt 0) { [string]::Join(',', $collisionSignals) } else { 'none' } + $body = @" + Release race-hardening drill failed. + + - Run: $env:RUN_URL + - Branch: $($report.branch) + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Predicted target tag: $($report.evidence.predicted_target_tag) + - Final target tag: $($report.evidence.final_target_tag) + - Collision observed: $($report.evidence.collision_observed) + - Collision retries: $($report.evidence.collision_retries) + - Collision signals: $collisionSignalsText + - Control-plane report artifact: $($report.artifacts.control_plane_report_artifact) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close release race-hardening incident issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Race Hardening Drill Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-race-hardening-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release race-hardening drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $collisionSignals = @($report.evidence.collision_signals | ForEach-Object { [string]$_ }) + $collisionSignalsText = if ($collisionSignals.Count -gt 0) { [string]::Join(',', $collisionSignals) } else { 'none' } + $body = @" + Release race-hardening drill recovered. + + - Run: $env:RUN_URL + - Branch: $($report.branch) + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Predicted target tag: $($report.evidence.predicted_target_tag) + - Final target tag: $($report.evidence.final_target_tag) + - Collision observed: $($report.evidence.collision_observed) + - Collision retries: $($report.evidence.collision_retries) + - Collision signals: $collisionSignalsText + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/AGENTS.md b/AGENTS.md index 9e881b6..4d6254a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -302,7 +302,12 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `rollback_assets_missing` - `rollback_drill_runtime_error` - `.github/workflows/release-race-hardening-drill.yml` must run `scripts/Invoke-ReleaseRaceHardeningDrill.ps1`. +- Race-hardening drill workflow must run on weekly schedule and on `integration/*` push events so release PR lanes receive deterministic collision-proof status. - Race-hardening drill must dispatch both `release-workspace-installer.yml` (contender) and `release-control-plane.yml` (`mode=CanaryCycle`, `dry_run=false`) and validate collision handling using control-plane artifact evidence. +- Race-hardening drill workflow must publish both: + - `release-race-hardening-drill-report.json` + - `release-race-hardening-weekly-summary.json` +- Race-hardening drill workflow must manage incident lifecycle through `scripts/Invoke-OpsIncidentLifecycle.ps1` with title `Release Race Hardening Drill Alert`. - Race-hardening drill reason codes must remain explicit: - `drill_passed` - `control_plane_collision_not_observed` @@ -314,7 +319,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref ## Integration Gate Policy - `.github/workflows/integration-gate.yml` is the integration-branch aggregator workflow. -- It must gate on required contexts: `CI Pipeline`, `Workspace Installer Contract`, `Reproducibility Contract`, `Provenance Contract`. +- It must gate on required contexts: `CI Pipeline`, `Workspace Installer Contract`, `Reproducibility Contract`, `Provenance Contract`, `Release Race Hardening Drill`. - Keep this as a distinct check context (`Integration Gate`) for branch-protection phase-in after promotion criteria are met. ## Installer Harness Execution Contract diff --git a/README.md b/README.md index c0b711b..2164a71 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ It polls commit statuses and only passes when these contexts are successful: - `Workspace Installer Contract` - `Reproducibility Contract` - `Provenance Contract` +- `Release Race Hardening Drill` ## Installer harness (self-hosted) @@ -466,7 +467,12 @@ Underlying rollback evaluator `scripts/Invoke-ReleaseRollbackDrill.ps1` still em - `rollback_candidate_missing` - `rollback_assets_missing` -`release-race-hardening-drill.yml` is scheduled weekly and supports manual dispatch. It runs `scripts/Invoke-ReleaseRaceHardeningDrill.ps1` to prove release-tag collision handling under parallel dispatch pressure: +`release-race-hardening-drill.yml` runs on: +- weekly schedule +- `push` to `integration/*` (release PR enforcement lane) +- manual dispatch + +It runs `scripts/Invoke-ReleaseRaceHardeningDrill.ps1` to prove release-tag collision handling under parallel dispatch pressure: - dispatches a contender `release-workspace-installer.yml` run at predicted next SemVer canary tag - dispatches `release-control-plane.yml` in `CanaryCycle` mode immediately after - watches both runs and downloads `release-control-plane-report-` artifact @@ -478,6 +484,11 @@ Underlying rollback evaluator `scripts/Invoke-ReleaseRollbackDrill.ps1` still em - `control_plane_report_missing` - `control_plane_run_failed` +Operational behavior: +- uploads `release-race-hardening-drill-report.json` +- emits weekly-review artifact `release-race-hardening-weekly-summary.json` +- uses incident lifecycle automation (`Invoke-OpsIncidentLifecycle.ps1`) with issue title `Release Race Hardening Drill Alert` on failure/recovery + ## Local Docker package for control-plane exercise Run the local Docker harness (safe default, validate + dry-run): diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 56eb788..3d42dcb 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -268,11 +268,28 @@ Deterministic drill failure reason codes: - `control_plane_report_download_failed` - `control_plane_report_missing` - `control_plane_run_failed` +- incident title on failure/recovery: `Release Race Hardening Drill Alert` + +Weekly summary artifact review: + +```powershell +gh run list -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + --workflow release-race-hardening-drill.yml ` + --limit 1 + +gh run download ` + -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -n release-race-hardening-weekly-summary- ` + -D .\tmp-race-hardening-summary + +Get-Content .\tmp-race-hardening-summary\release-race-hardening-weekly-summary.json -Raw +``` ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` - `release-control-plane-override-audit.json` (when override is requested/applied) - `release-race-hardening-drill-report.json` +- `release-race-hardening-weekly-summary.json` - sync guard run URL - parity SHAs (upstream and fork) diff --git a/tests/IntegrationGateWorkflowContract.Tests.ps1 b/tests/IntegrationGateWorkflowContract.Tests.ps1 index 6a19ad1..7376291 100644 --- a/tests/IntegrationGateWorkflowContract.Tests.ps1 +++ b/tests/IntegrationGateWorkflowContract.Tests.ps1 @@ -26,7 +26,8 @@ Describe 'Integration gate workflow contract' { 'CI Pipeline', 'Workspace Installer Contract', 'Reproducibility Contract', - 'Provenance Contract' + 'Provenance Contract', + 'Release Race Hardening Drill' )) { $script:workflowContent | Should -Match ([regex]::Escape($context)) } diff --git a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 index 7d0553b..1e0963d 100644 --- a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 @@ -21,19 +21,23 @@ Describe 'Release race-hardening drill workflow contract' { It 'is scheduled and dispatchable with bounded drill controls' { $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'integration/\*\*' $script:workflowContent | Should -Match 'workflow_dispatch:' $script:workflowContent | Should -Match 'auto_remediate' $script:workflowContent | Should -Match 'keep_latest_canary_n' $script:workflowContent | Should -Match 'watch_timeout_minutes' } - It 'runs on hosted runner, executes drill runtime, and uploads report artifact' { + It 'runs on hosted runner, executes drill runtime, and uploads drill + weekly summary artifacts' { $script:workflowContent | Should -Match 'runs-on:\s*ubuntu-latest' $script:workflowContent | Should -Match 'Enforce hosted-runner lock' $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' $script:workflowContent | Should -Match 'hosted_runner_required' $script:workflowContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' $script:workflowContent | Should -Match 'release-race-hardening-drill-report\.json' + $script:workflowContent | Should -Match 'release-race-hardening-weekly-summary\.json' + $script:workflowContent | Should -Match 'Upload release race-hardening weekly summary' $script:workflowContent | Should -Match 'actions:\s*write' } @@ -58,4 +62,12 @@ Describe 'Release race-hardening drill workflow contract' { $script:runtimeContent | Should -Match '-canary\.' $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' } + + It 'manages incident lifecycle for drill failures and recoveries' { + $script:workflowContent | Should -Match 'Release Race Hardening Drill Alert' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + $script:workflowContent | Should -Match 'issues:\s*write' + } } diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index f6fd103..b459cbe 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -37,6 +37,8 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'release_verification_failed' $script:runbookContent | Should -Match 'control_plane_collision_not_observed' $script:runbookContent | Should -Match 'drill_passed' + $script:runbookContent | Should -Match 'Release Race Hardening Drill Alert' + $script:runbookContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:runbookContent | Should -Match 'promotion_lineage_invalid' $script:runbookContent | Should -Match 'stable_window_override_invalid' $script:runbookContent | Should -Match 'release-manifest\.json' @@ -54,6 +56,8 @@ Describe 'Scope A ops runbook contract' { $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' + $script:readmeContent | Should -Match 'Release Race Hardening Drill' + $script:readmeContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' @@ -67,6 +71,8 @@ Describe 'Scope A ops runbook contract' { $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' + $script:agentsContent | Should -Match 'Release Race Hardening Drill' + $script:agentsContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 3eae4b0..cfb23c9 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -397,6 +397,8 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' + $script:agentsContent | Should -Match 'Release Race Hardening Drill Alert' + $script:agentsContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' @@ -436,6 +438,8 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' + $script:readmeContent | Should -Match 'Release Race Hardening Drill' + $script:readmeContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' From 237a86ad92e5c993a87ed0b917624d0c84ca4dff Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:06:54 -0800 Subject: [PATCH 46/60] Add race-hardening gate and branch-protection drift controls --- .../branch-protection-drift-check.yml | 109 ++++++ .github/workflows/ci.yml | 2 + .github/workflows/integration-gate.yml | 59 ++- .../release-race-hardening-drill.yml | 3 - .../workflows/release-race-hardening-gate.yml | 72 ++++ AGENTS.md | 8 +- README.md | 24 +- .../runbooks/release-ops-incident-response.md | 53 +++ scripts/Set-ReleaseBranchProtectionPolicy.ps1 | 347 ++++++++++++++++++ .../Test-ReleaseBranchProtectionPolicy.ps1 | 215 +++++++++++ scripts/Test-ReleaseRaceHardeningGate.ps1 | 203 ++++++++++ ...hProtectionDriftWorkflowContract.Tests.ps1 | 66 ++++ .../IntegrationGateWorkflowContract.Tests.ps1 | 8 +- ...ceHardeningDrillWorkflowContract.Tests.ps1 | 2 - ...aceHardeningGateWorkflowContract.Tests.ps1 | 52 +++ tests/ScopeAOpsRunbookContract.Tests.ps1 | 17 + tests/WorkspaceSurfaceContract.Tests.ps1 | 23 ++ 17 files changed, 1241 insertions(+), 22 deletions(-) create mode 100644 .github/workflows/branch-protection-drift-check.yml create mode 100644 .github/workflows/release-race-hardening-gate.yml create mode 100644 scripts/Set-ReleaseBranchProtectionPolicy.ps1 create mode 100644 scripts/Test-ReleaseBranchProtectionPolicy.ps1 create mode 100644 scripts/Test-ReleaseRaceHardeningGate.ps1 create mode 100644 tests/BranchProtectionDriftWorkflowContract.Tests.ps1 create mode 100644 tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1 diff --git a/.github/workflows/branch-protection-drift-check.yml b/.github/workflows/branch-protection-drift-check.yml new file mode 100644 index 0000000..3d9856e --- /dev/null +++ b/.github/workflows/branch-protection-drift-check.yml @@ -0,0 +1,109 @@ +name: branch-protection-drift-check + +on: + schedule: + - cron: '10 * * * *' + push: + branches: + - main + workflow_dispatch: + +permissions: + contents: read + actions: read + issues: write + +jobs: + branch-protection-drift-check: + name: Branch Protection Drift Check + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Verify release branch-protection policy + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'branch-protection-drift-report.json' + & pwsh -NoProfile -File ./scripts/Test-ReleaseBranchProtectionPolicy.ps1 ` + -Repository '${{ github.repository }}' ` + -OutputPath $reportPath + + - name: Upload branch-protection drift report + if: always() + uses: actions/upload-artifact@v4 + with: + name: branch-protection-drift-report-${{ github.run_id }} + path: ${{ runner.temp }}/branch-protection-drift-report.json + if-no-files-found: error + + - name: Update branch-protection drift incident on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Branch Protection Drift Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'branch-protection-drift-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "Branch-protection drift report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Branch-protection drift check failed. + + - Run: $env:RUN_URL + - Repository: $($report.repository) + - Reason codes: $reasonCodeText + - Message: $($report.message) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close branch-protection drift incident on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Branch Protection Drift Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'branch-protection-drift-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "Branch-protection drift report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Branch-protection drift check recovered. + + - Run: $env:RUN_URL + - Repository: $($report.repository) + - Reason codes: $reasonCodeText + - Message: $($report.message) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bdb36f..76e9d8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,7 +83,9 @@ jobs: './tests/OpsAutoRemediationWorkflowContract.Tests.ps1', './tests/OpsSloGateWorkflowContract.Tests.ps1', './tests/OpsPolicyDriftWorkflowContract.Tests.ps1', + './tests/BranchProtectionDriftWorkflowContract.Tests.ps1', './tests/ReleaseControlPlaneWorkflowContract.Tests.ps1', + './tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1', './tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1', './tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1', './tests/CanarySmokeTagHygieneWorkflowContract.Tests.ps1', diff --git a/.github/workflows/integration-gate.yml b/.github/workflows/integration-gate.yml index 139b322..b0cb095 100644 --- a/.github/workflows/integration-gate.yml +++ b/.github/workflows/integration-gate.yml @@ -3,6 +3,11 @@ name: Integration Gate on: push: branches: + - main + - integration/** + pull_request: + branches: + - main - integration/** workflow_dispatch: inputs: @@ -26,6 +31,8 @@ jobs: TARGET_REPOSITORY: ${{ github.repository }} INPUT_REF: ${{ inputs.ref }} TRIGGER_SHA: ${{ github.sha }} + EVENT_NAME: ${{ github.event_name }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | $ErrorActionPreference = 'Stop' @@ -37,7 +44,11 @@ jobs: $targetRef = [string]$env:INPUT_REF $sha = '' if ([string]::IsNullOrWhiteSpace($targetRef)) { - $sha = ([string]$env:TRIGGER_SHA).Trim().ToLowerInvariant() + if ([string]$env:EVENT_NAME -eq 'pull_request') { + $sha = ([string]$env:PR_HEAD_SHA).Trim().ToLowerInvariant() + } else { + $sha = ([string]$env:TRIGGER_SHA).Trim().ToLowerInvariant() + } } else { $sha = (& gh api "repos/$repo/commits/$targetRef" --jq '.sha').Trim().ToLowerInvariant() } @@ -59,20 +70,31 @@ jobs: $deadline = (Get-Date).ToUniversalTime().AddSeconds($timeoutSeconds) while ((Get-Date).ToUniversalTime() -lt $deadline) { - $statusJson = & gh api "repos/$repo/commits/$sha/status" + $checksJson = & gh api "repos/$repo/commits/$sha/check-runs?per_page=100" if ($LASTEXITCODE -ne 0) { - throw "Failed to read commit status for '$repo' @ '$sha'." + throw "Failed to read commit check-runs for '$repo' @ '$sha'." } - $statusObject = $statusJson | ConvertFrom-Json -ErrorAction Stop + $checksObject = $checksJson | ConvertFrom-Json -ErrorAction Stop $latestByContext = @{} - foreach ($status in @($statusObject.statuses)) { - $context = [string]$status.context + $orderedChecks = @( + @($checksObject.check_runs) | + Sort-Object -Property @{ Expression = { + if ([string]::IsNullOrWhiteSpace([string]$_.started_at)) { + [DateTimeOffset]::MinValue + } else { + [DateTimeOffset]::Parse([string]$_.started_at).ToUniversalTime() + } + }; Descending = $true } + ) + + foreach ($check in @($orderedChecks)) { + $context = [string]$check.name if ([string]::IsNullOrWhiteSpace($context)) { continue } if (-not $latestByContext.ContainsKey($context)) { - $latestByContext[$context] = $status + $latestByContext[$context] = $check } } @@ -86,13 +108,26 @@ jobs: continue } - $state = [string]$latestByContext[$context].state - $stateSummary += "$context=$state" - switch ($state) { + $check = $latestByContext[$context] + $status = [string]$check.status + $conclusion = [string]$check.conclusion + $stateSummary += "$context=$status/$conclusion" + + if ($status -ne 'completed') { + $allResolved = $false + continue + } + + switch ($conclusion) { 'success' {} + 'neutral' {} + 'skipped' {} 'failure' { $hasFailure = $true } - 'error' { $hasFailure = $true } - 'pending' { $allResolved = $false } + 'cancelled' { $hasFailure = $true } + 'timed_out' { $hasFailure = $true } + 'action_required' { $hasFailure = $true } + 'startup_failure' { $hasFailure = $true } + 'stale' { $hasFailure = $true } default { $allResolved = $false } } } diff --git a/.github/workflows/release-race-hardening-drill.yml b/.github/workflows/release-race-hardening-drill.yml index f3ea405..e79a442 100644 --- a/.github/workflows/release-race-hardening-drill.yml +++ b/.github/workflows/release-race-hardening-drill.yml @@ -1,9 +1,6 @@ name: release-race-hardening-drill on: - push: - branches: - - integration/** schedule: - cron: '40 9 * * 2' workflow_dispatch: diff --git a/.github/workflows/release-race-hardening-gate.yml b/.github/workflows/release-race-hardening-gate.yml new file mode 100644 index 0000000..2ecea7a --- /dev/null +++ b/.github/workflows/release-race-hardening-gate.yml @@ -0,0 +1,72 @@ +name: release-race-hardening-gate + +on: + push: + branches: + - main + - integration/** + pull_request: + branches: + - main + - integration/** + workflow_dispatch: + inputs: + source_branch: + description: Branch to evaluate latest successful drill runs from. + required: false + default: main + type: string + max_age_hours: + description: Maximum age of latest successful drill run. + required: false + default: '168' + type: string + +permissions: + contents: read + actions: read + +jobs: + release-race-hardening-gate: + name: Release Race Hardening Drill + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Evaluate race-hardening drill gate + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-race-hardening-gate-report.json' + + $sourceBranch = [string]'${{ inputs.source_branch }}' + if ([string]::IsNullOrWhiteSpace($sourceBranch)) { + $sourceBranch = 'main' + } + + $maxAgeHoursText = [string]'${{ inputs.max_age_hours }}' + $maxAgeHours = 168 + if (-not [string]::IsNullOrWhiteSpace($maxAgeHoursText)) { + $parsed = 0 + if (-not [int]::TryParse($maxAgeHoursText, [ref]$parsed)) { + throw "max_age_hours must be an integer. actual='$maxAgeHoursText'" + } + $maxAgeHours = $parsed + } + + & pwsh -NoProfile -File ./scripts/Test-ReleaseRaceHardeningGate.ps1 ` + -Repository '${{ github.repository }}' ` + -SourceBranch $sourceBranch ` + -MaxAgeHours $maxAgeHours ` + -OutputPath $reportPath + + - name: Upload release race-hardening gate report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-race-hardening-gate-report-${{ github.run_id }} + path: ${{ runner.temp }}/release-race-hardening-gate-report.json + if-no-files-found: error diff --git a/AGENTS.md b/AGENTS.md index 4d6254a..970d223 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -302,12 +302,16 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `rollback_assets_missing` - `rollback_drill_runtime_error` - `.github/workflows/release-race-hardening-drill.yml` must run `scripts/Invoke-ReleaseRaceHardeningDrill.ps1`. -- Race-hardening drill workflow must run on weekly schedule and on `integration/*` push events so release PR lanes receive deterministic collision-proof status. +- Race-hardening drill workflow is release-lane evidence generation and must run on weekly schedule + manual dispatch only. - Race-hardening drill must dispatch both `release-workspace-installer.yml` (contender) and `release-control-plane.yml` (`mode=CanaryCycle`, `dry_run=false`) and validate collision handling using control-plane artifact evidence. - Race-hardening drill workflow must publish both: - `release-race-hardening-drill-report.json` - `release-race-hardening-weekly-summary.json` - Race-hardening drill workflow must manage incident lifecycle through `scripts/Invoke-OpsIncidentLifecycle.ps1` with title `Release Race Hardening Drill Alert`. +- `.github/workflows/release-race-hardening-gate.yml` must run `scripts/Test-ReleaseRaceHardeningGate.ps1` and provide required check context `Release Race Hardening Drill` for `main` + `integration/*` PR/push lanes. +- Race-hardening gate must fail when latest successful drill evidence is missing/stale, `reason_code != drill_passed`, or collision evidence is absent. +- `.github/workflows/branch-protection-drift-check.yml` must run `scripts/Test-ReleaseBranchProtectionPolicy.ps1` and maintain incident lifecycle title `Branch Protection Drift Alert`. +- `scripts/Set-ReleaseBranchProtectionPolicy.ps1` is the deterministic apply path for required-check drift repair. - Race-hardening drill reason codes must remain explicit: - `drill_passed` - `control_plane_collision_not_observed` @@ -319,7 +323,9 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref ## Integration Gate Policy - `.github/workflows/integration-gate.yml` is the integration-branch aggregator workflow. +- It must run on `push` + `pull_request` for `main` and `integration/*` (plus dispatch). - It must gate on required contexts: `CI Pipeline`, `Workspace Installer Contract`, `Reproducibility Contract`, `Provenance Contract`, `Release Race Hardening Drill`. +- It must evaluate commit check-runs (not legacy commit status contexts) and treat `success`, `neutral`, and `skipped` as pass states. - Keep this as a distinct check context (`Integration Gate`) for branch-protection phase-in after promotion criteria are met. ## Installer Harness Execution Contract diff --git a/README.md b/README.md index 2164a71..20defb9 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,12 @@ Additional supply-chain contract jobs: ## Integration gate -`integration-gate.yml` provides a single `Integration Gate` context for `integration/*` branches (and manual dispatch). -It polls commit statuses and only passes when these contexts are successful: +`integration-gate.yml` provides a single `Integration Gate` context for: +- `push` to `main` and `integration/*` +- `pull_request` targeting `main` and `integration/*` +- manual dispatch + +It polls commit check-runs and only passes when these contexts are successful (or intentionally skipped): - `CI Pipeline` - `Workspace Installer Contract` - `Reproducibility Contract` @@ -469,7 +473,6 @@ Underlying rollback evaluator `scripts/Invoke-ReleaseRollbackDrill.ps1` still em `release-race-hardening-drill.yml` runs on: - weekly schedule -- `push` to `integration/*` (release PR enforcement lane) - manual dispatch It runs `scripts/Invoke-ReleaseRaceHardeningDrill.ps1` to prove release-tag collision handling under parallel dispatch pressure: @@ -489,6 +492,21 @@ Operational behavior: - emits weekly-review artifact `release-race-hardening-weekly-summary.json` - uses incident lifecycle automation (`Invoke-OpsIncidentLifecycle.ps1`) with issue title `Release Race Hardening Drill Alert` on failure/recovery +`release-race-hardening-gate.yml` provides the required branch-protection context (`Release Race Hardening Drill`) for: +- `push` to `main` and `integration/*` +- `pull_request` targeting `main` and `integration/*` + +It runs `scripts/Test-ReleaseRaceHardeningGate.ps1` and fails when: +- no recent successful drill run exists +- latest drill report is missing or not `reason_code=drill_passed` +- latest drill report does not include collision evidence + +`branch-protection-drift-check.yml` continuously validates release branch-protection policy via `scripts/Test-ReleaseBranchProtectionPolicy.ps1` and reports drift for: +- `main` +- `integration/*` + +Use `scripts/Set-ReleaseBranchProtectionPolicy.ps1` to deterministically apply/repair required check contracts. + ## Local Docker package for control-plane exercise Run the local Docker harness (safe default, validate + dry-run): diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 3d42dcb..73a95fa 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -285,11 +285,64 @@ gh run download ` Get-Content .\tmp-race-hardening-summary\release-race-hardening-weekly-summary.json -Raw ``` +## Release Race-Hardening Gate Verification +This gate provides required check context `Release Race Hardening Drill` for `main` and `integration/*` PR/push lanes. + +Manual gate dispatch: + +```powershell +gh workflow run release-race-hardening-gate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f source_branch=main ` + -f max_age_hours=168 +``` + +Local gate check: + +```powershell +Set-Location D:\dev\labview-cdev-surface-fork +pwsh -File .\scripts\Test-ReleaseRaceHardeningGate.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -SourceBranch main ` + -MaxAgeHours 168 +``` + +Expected gate failure reason codes include: +- `drill_run_missing` +- `drill_run_stale` +- `drill_reason_code_invalid` +- `drill_collision_evidence_missing` + +## Branch Protection Drift + Apply +Continuous drift monitor: + +```powershell +gh workflow run branch-protection-drift-check.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +Local policy verify: + +```powershell +pwsh -File .\scripts\Test-ReleaseBranchProtectionPolicy.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +Deterministic apply/repair: + +```powershell +pwsh -File .\scripts\Set-ReleaseBranchProtectionPolicy.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +Branch-protection drift incident title: +- `Branch Protection Drift Alert` + ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` - `release-control-plane-override-audit.json` (when override is requested/applied) - `release-race-hardening-drill-report.json` - `release-race-hardening-weekly-summary.json` +- `release-race-hardening-gate-report.json` +- `branch-protection-drift-report.json` - sync guard run URL - parity SHAs (upstream and fork) diff --git a/scripts/Set-ReleaseBranchProtectionPolicy.ps1 b/scripts/Set-ReleaseBranchProtectionPolicy.ps1 new file mode 100644 index 0000000..2bb86c1 --- /dev/null +++ b/scripts/Set-ReleaseBranchProtectionPolicy.ps1 @@ -0,0 +1,347 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$MainPattern = 'main', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/*-]+$')] + [string]$IntegrationPattern = 'integration/*', + + [Parameter()] + [Alias('MainRequiredContext')] + [string[]]$MainRequiredContexts = @( + 'CI Pipeline', + 'Integration Gate', + 'Release Race Hardening Drill' + ), + + [Parameter()] + [Alias('IntegrationRequiredContext')] + [string[]]$IntegrationRequiredContexts = @( + 'CI Pipeline', + 'Integration Gate', + 'Release Race Hardening Drill' + ), + + [Parameter()] + [switch]$DryRun, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +function Invoke-GraphQl { + param( + [Parameter(Mandatory = $true)][string]$Query, + [Parameter(Mandatory = $true)][hashtable]$Variables + ) + + $args = @('api', 'graphql', '-f', ("query={0}" -f $Query)) + foreach ($key in $Variables.Keys) { + $args += @('-F', ("{0}={1}" -f $key, [string]$Variables[$key])) + } + + return (Invoke-GhJson -Arguments $args) +} + +function Invoke-GraphQlMutationWithInput { + param( + [Parameter(Mandatory = $true)][string]$Mutation, + [Parameter(Mandatory = $true)]$Input + ) + + $inputJson = $Input | ConvertTo-Json -Depth 50 -Compress + return (Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', ("query={0}" -f $Mutation), + '-f', ("input={0}" -f $inputJson) + )) +} + +function Resolve-ExistingRules { + param( + [Parameter(Mandatory = $true)][string]$Owner, + [Parameter(Mandatory = $true)][string]$Name + ) + + $query = @' +query($owner:String!, $name:String!) { + repository(owner:$owner, name:$name) { + id + branchProtectionRules(first:100) { + nodes { + id + pattern + requiresStatusChecks + requiresStrictStatusChecks + requiredStatusCheckContexts + allowsForcePushes + allowsDeletions + } + } + } +} +'@ + return (Invoke-GraphQl -Query $query -Variables @{ + owner = $Owner + name = $Name + }) +} + +function New-DesiredRuleSpec { + param( + [Parameter(Mandatory = $true)][string]$Pattern, + [Parameter(Mandatory = $true)][string[]]$Contexts + ) + + return [ordered]@{ + pattern = $Pattern + requiresStatusChecks = $true + requiresStrictStatusChecks = $true + requiredStatusCheckContexts = @($Contexts) + allowsForcePushes = $false + allowsDeletions = $false + } +} + +function Test-RuleMatchesSpec { + param( + [Parameter()][AllowNull()]$Rule, + [Parameter(Mandatory = $true)]$Spec + ) + + if ($null -eq $Rule) { + return $false + } + + if ([bool]$Rule.requiresStatusChecks -ne [bool]$Spec.requiresStatusChecks) { + return $false + } + if ([bool]$Rule.requiresStrictStatusChecks -ne [bool]$Spec.requiresStrictStatusChecks) { + return $false + } + if ([bool]$Rule.allowsForcePushes -ne [bool]$Spec.allowsForcePushes) { + return $false + } + if ([bool]$Rule.allowsDeletions -ne [bool]$Spec.allowsDeletions) { + return $false + } + + $actual = @($Rule.requiredStatusCheckContexts | ForEach-Object { [string]$_ }) + $expected = @($Spec.requiredStatusCheckContexts | ForEach-Object { [string]$_ }) + foreach ($ctx in $expected) { + if ($actual -notcontains $ctx) { + return $false + } + } + + return $true +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + dry_run = [bool]$DryRun + status = 'fail' + reason_codes = @() + message = '' + actions = @() +} + +try { + $repoParts = $Repository.Split('/') + if ($repoParts.Count -ne 2) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'repository_invalid' + throw "Repository slug is invalid: $Repository" + } + + $owner = [string]$repoParts[0] + $name = [string]$repoParts[1] + $existingPayload = Resolve-ExistingRules -Owner $owner -Name $name + $repositoryNode = $existingPayload.data.repository + if ($null -eq $repositoryNode -or [string]::IsNullOrWhiteSpace([string]$repositoryNode.id)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'repository_not_found' + throw "Repository GraphQL node not found: $Repository" + } + $repositoryId = [string]$repositoryNode.id + $existingRules = @($repositoryNode.branchProtectionRules.nodes) + + $desired = @( + New-DesiredRuleSpec -Pattern $MainPattern -Contexts @($MainRequiredContexts), + New-DesiredRuleSpec -Pattern $IntegrationPattern -Contexts @($IntegrationRequiredContexts) + ) + + $actionRecords = [System.Collections.Generic.List[object]]::new() + + foreach ($spec in @($desired)) { + $current = @($existingRules | Where-Object { [string]$_.pattern -eq [string]$spec.pattern } | Select-Object -First 1) + $currentRule = if (@($current).Count -eq 1) { $current[0] } else { $null } + $matches = Test-RuleMatchesSpec -Rule $currentRule -Spec $spec + if ($matches) { + [void]$actionRecords.Add([ordered]@{ + pattern = [string]$spec.pattern + action = 'noop' + rule_id = if ($null -eq $currentRule) { '' } else { [string]$currentRule.id } + }) + continue + } + + if ($DryRun) { + [void]$actionRecords.Add([ordered]@{ + pattern = [string]$spec.pattern + action = if ($null -eq $currentRule) { 'create_dry_run' } else { 'update_dry_run' } + rule_id = if ($null -eq $currentRule) { '' } else { [string]$currentRule.id } + desired = $spec + }) + continue + } + + if ($null -eq $currentRule) { + $createMutation = @' +mutation($input:CreateBranchProtectionRuleInput!) { + createBranchProtectionRule(input:$input) { + branchProtectionRule { + id + pattern + requiresStatusChecks + requiresStrictStatusChecks + requiredStatusCheckContexts + allowsForcePushes + allowsDeletions + } + } +} +'@ + $createInput = [ordered]@{ + repositoryId = $repositoryId + pattern = [string]$spec.pattern + requiresStatusChecks = [bool]$spec.requiresStatusChecks + requiresStrictStatusChecks = [bool]$spec.requiresStrictStatusChecks + requiredStatusCheckContexts = @($spec.requiredStatusCheckContexts) + allowsForcePushes = [bool]$spec.allowsForcePushes + allowsDeletions = [bool]$spec.allowsDeletions + } + + $createResult = Invoke-GraphQlMutationWithInput -Mutation $createMutation -Input $createInput + $createdRule = $createResult.data.createBranchProtectionRule.branchProtectionRule + [void]$actionRecords.Add([ordered]@{ + pattern = [string]$spec.pattern + action = 'created' + rule_id = [string]$createdRule.id + }) + } else { + $updateMutation = @' +mutation($input:UpdateBranchProtectionRuleInput!) { + updateBranchProtectionRule(input:$input) { + branchProtectionRule { + id + pattern + requiresStatusChecks + requiresStrictStatusChecks + requiredStatusCheckContexts + allowsForcePushes + allowsDeletions + } + } +} +'@ + $updateInput = [ordered]@{ + branchProtectionRuleId = [string]$currentRule.id + pattern = [string]$spec.pattern + requiresStatusChecks = [bool]$spec.requiresStatusChecks + requiresStrictStatusChecks = [bool]$spec.requiresStrictStatusChecks + requiredStatusCheckContexts = @($spec.requiredStatusCheckContexts) + allowsForcePushes = [bool]$spec.allowsForcePushes + allowsDeletions = [bool]$spec.allowsDeletions + } + + $updateResult = Invoke-GraphQlMutationWithInput -Mutation $updateMutation -Input $updateInput + $updatedRule = $updateResult.data.updateBranchProtectionRule.branchProtectionRule + [void]$actionRecords.Add([ordered]@{ + pattern = [string]$spec.pattern + action = 'updated' + rule_id = [string]$updatedRule.id + }) + } + } + + $report.actions = @($actionRecords) + + if (-not $DryRun) { + $verifyScript = Join-Path $PSScriptRoot 'Test-ReleaseBranchProtectionPolicy.ps1' + if (-not (Test-Path -LiteralPath $verifyScript -PathType Leaf)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'verify_script_missing' + throw "Verification script missing: $verifyScript" + } + + $verifyPath = Join-Path ([System.IO.Path]::GetTempPath()) ("branch-protection-verify-" + [Guid]::NewGuid().ToString('N') + '.json') + & pwsh -NoProfile -File $verifyScript ` + -Repository $Repository ` + -MainPattern $MainPattern ` + -IntegrationPattern $IntegrationPattern ` + -MainRequiredContexts @($MainRequiredContexts) ` + -IntegrationRequiredContexts @($IntegrationRequiredContexts) ` + -OutputPath $verifyPath | Out-Null + $verifyExit = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($verifyExit -ne 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'verification_failed' + if (Test-Path -LiteralPath $verifyPath -PathType Leaf) { + $verifyReport = Get-Content -LiteralPath $verifyPath -Raw | ConvertFrom-Json -Depth 100 + throw ("Post-apply verification failed. reason_codes={0}" -f [string]::Join(',', @($verifyReport.reason_codes | ForEach-Object { [string]$_ }))) + } + throw 'Post-apply verification failed without report payload.' + } + } + + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = if ($DryRun) { @('dry_run') } else { @('applied') } + $report.message = if ($DryRun) { 'Release branch-protection apply dry-run completed.' } else { 'Release branch-protection policy applied and verified.' } + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Release branch-protection apply failed. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } +} +catch { + if ($reasonCodes.Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'apply_runtime_error' + } + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Test-ReleaseBranchProtectionPolicy.ps1 b/scripts/Test-ReleaseBranchProtectionPolicy.ps1 new file mode 100644 index 0000000..4f668fa --- /dev/null +++ b/scripts/Test-ReleaseBranchProtectionPolicy.ps1 @@ -0,0 +1,215 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$MainPattern = 'main', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/*-]+$')] + [string]$IntegrationPattern = 'integration/*', + + [Parameter()] + [Alias('MainRequiredContext')] + [string[]]$MainRequiredContexts = @( + 'CI Pipeline', + 'Integration Gate', + 'Release Race Hardening Drill' + ), + + [Parameter()] + [Alias('IntegrationRequiredContext')] + [string[]]$IntegrationRequiredContexts = @( + 'CI Pipeline', + 'Integration Gate', + 'Release Race Hardening Drill' + ), + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +function Test-RuleContract { + param( + [Parameter(Mandatory = $true)]$Rule, + [Parameter(Mandatory = $true)][string]$Pattern, + [Parameter(Mandatory = $true)][string[]]$RequiredContexts + ) + + $issues = [System.Collections.Generic.List[string]]::new() + if ($null -eq $Rule) { + [void]$issues.Add('rule_missing') + return [ordered]@{ + status = 'fail' + issues = @($issues) + actual_contexts = @() + requires_status_checks = $false + requires_strict_status_checks = $false + allows_force_pushes = $false + allows_deletions = $false + } + } + + if (-not [bool]$Rule.requiresStatusChecks) { + [void]$issues.Add('requires_status_checks_false') + } + if (-not [bool]$Rule.requiresStrictStatusChecks) { + [void]$issues.Add('requires_strict_status_checks_false') + } + if ([bool]$Rule.allowsForcePushes) { + [void]$issues.Add('allows_force_pushes_true') + } + if ([bool]$Rule.allowsDeletions) { + [void]$issues.Add('allows_deletions_true') + } + + $actualContexts = @($Rule.requiredStatusCheckContexts | ForEach-Object { [string]$_ }) + foreach ($required in @($RequiredContexts)) { + if ($actualContexts -notcontains [string]$required) { + [void]$issues.Add("missing_context:$required") + } + } + + return [ordered]@{ + status = if ($issues.Count -eq 0) { 'pass' } else { 'fail' } + issues = @($issues) + actual_contexts = @($actualContexts) + requires_status_checks = [bool]$Rule.requiresStatusChecks + requires_strict_status_checks = [bool]$Rule.requiresStrictStatusChecks + allows_force_pushes = [bool]$Rule.allowsForcePushes + allows_deletions = [bool]$Rule.allowsDeletions + } +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + status = 'fail' + reason_codes = @() + message = '' + expected = [ordered]@{ + main_pattern = $MainPattern + integration_pattern = $IntegrationPattern + main_required_contexts = @($MainRequiredContexts) + integration_required_contexts = @($IntegrationRequiredContexts) + } + actual = [ordered]@{ + main_rule = $null + integration_rule = $null + } +} + +try { + $repoParts = $Repository.Split('/') + if ($repoParts.Count -ne 2) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'repository_invalid' + throw "Repository slug is invalid: $Repository" + } + + $owner = [string]$repoParts[0] + $name = [string]$repoParts[1] + $query = @' +query($owner:String!, $name:String!) { + repository(owner:$owner, name:$name) { + branchProtectionRules(first:100) { + nodes { + id + pattern + requiresStatusChecks + requiresStrictStatusChecks + requiredStatusCheckContexts + allowsForcePushes + allowsDeletions + } + } + } +} +'@ + $result = Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', ("query={0}" -f $query), + '-F', ("owner={0}" -f $owner), + '-F', ("name={0}" -f $name) + ) + + $rules = @($result.data.repository.branchProtectionRules.nodes) + $mainRule = @($rules | Where-Object { [string]$_.pattern -eq $MainPattern } | Select-Object -First 1) + $integrationRule = @($rules | Where-Object { [string]$_.pattern -eq $IntegrationPattern } | Select-Object -First 1) + + $mainCheck = Test-RuleContract -Rule ($mainRule | Select-Object -First 1) -Pattern $MainPattern -RequiredContexts @($MainRequiredContexts) + $integrationCheck = Test-RuleContract -Rule ($integrationRule | Select-Object -First 1) -Pattern $IntegrationPattern -RequiredContexts @($IntegrationRequiredContexts) + + $report.actual.main_rule = [ordered]@{ + pattern = $MainPattern + check = $mainCheck + } + $report.actual.integration_rule = [ordered]@{ + pattern = $IntegrationPattern + check = $integrationCheck + } + + if ([string]$mainCheck.status -ne 'pass') { + if (@($mainCheck.issues) -contains 'rule_missing') { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'main_rule_missing' + } else { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'main_rule_mismatch' + } + } + if ([string]$integrationCheck.status -ne 'pass') { + if (@($integrationCheck.issues) -contains 'rule_missing') { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'integration_rule_missing' + } else { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'integration_rule_mismatch' + } + } + + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Release branch-protection policy is satisfied.' + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Release branch-protection policy drift detected. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } +} +catch { + if ($reasonCodes.Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'branch_protection_query_failed' + } + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Test-ReleaseRaceHardeningGate.ps1 b/scripts/Test-ReleaseRaceHardeningGate.ps1 new file mode 100644 index 0000000..ef49d97 --- /dev/null +++ b/scripts/Test-ReleaseRaceHardeningGate.ps1 @@ -0,0 +1,203 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$SourceBranch = 'main', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$WorkflowFile = 'release-race-hardening-drill.yml', + + [Parameter()] + [ValidateRange(1, 720)] + [int]$MaxAgeHours = 168, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +function Try-ParseUtcDateTimeOffset { + param( + [Parameter()][AllowNull()]$Value + ) + + $parsed = [DateTimeOffset]::MinValue + if ($null -eq $Value) { + return $null + } + + $text = [string]$Value + if ([string]::IsNullOrWhiteSpace($text)) { + return $null + } + + if ([DateTimeOffset]::TryParse($text, [ref]$parsed)) { + return $parsed.ToUniversalTime() + } + + return $null +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() +$warnings = [System.Collections.Generic.List[string]]::new() + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + source_branch = $SourceBranch + workflow_file = $WorkflowFile + max_age_hours = $MaxAgeHours + status = 'fail' + reason_codes = @() + message = '' + latest_successful_run = $null + drill_report = $null +} + +try { + $runs = @( + Get-GhWorkflowRunsPortable ` + -Repository $Repository ` + -Workflow $WorkflowFile ` + -Branch $SourceBranch ` + -Limit 50 + ) + + $successfulRuns = @( + $runs | + Where-Object { + [string]$_.status -eq 'completed' -and + [string]$_.conclusion -eq 'success' + } | + Sort-Object { Parse-RunTimestamp -Run $_ } -Descending + ) + + if (@($successfulRuns).Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_run_missing' + throw 'No successful race-hardening drill runs were found.' + } + + $latestRun = $successfulRuns[0] + $latestRunCreatedAt = Parse-RunTimestamp -Run $latestRun + $maxAge = [TimeSpan]::FromHours([double]$MaxAgeHours) + $runAge = [DateTimeOffset]::UtcNow - $latestRunCreatedAt + if ($runAge -gt $maxAge) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_run_stale' + throw ("Latest successful race-hardening run is stale. run_id={0} age_hours={1}" -f [string]$latestRun.databaseId, [Math]::Round($runAge.TotalHours, 2)) + } + + $report.latest_successful_run = [ordered]@{ + run_id = [string]$latestRun.databaseId + status = [string]$latestRun.status + conclusion = [string]$latestRun.conclusion + created_at_utc = [string]$latestRun.createdAt + url = [string]$latestRun.url + age_hours = [Math]::Round($runAge.TotalHours, 2) + } + + $artifactName = "release-race-hardening-drill-report-$([string]$latestRun.databaseId)" + $downloadRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("race-hardening-gate-" + [Guid]::NewGuid().ToString('N')) + New-Item -Path $downloadRoot -ItemType Directory -Force | Out-Null + + try { + & gh run download ([string]$latestRun.databaseId) -R $Repository -n $artifactName -D $downloadRoot + $downloadExit = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($downloadExit -ne 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_report_download_failed' + throw ("Unable to download drill report artifact. run_id={0} artifact={1} exit_code={2}" -f [string]$latestRun.databaseId, $artifactName, $downloadExit) + } + + $reportPath = @( + Get-ChildItem -Path $downloadRoot -Recurse -File -Filter 'release-race-hardening-drill-report.json' | + Select-Object -First 1 -ExpandProperty FullName + ) + if (@($reportPath).Count -ne 1 -or [string]::IsNullOrWhiteSpace([string]$reportPath[0])) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_report_missing' + throw ("Downloaded artifact is missing release-race-hardening-drill-report.json. run_id={0}" -f [string]$latestRun.databaseId) + } + + $drillReport = Get-Content -LiteralPath ([string]$reportPath[0]) -Raw | ConvertFrom-Json -Depth 100 + $collisionSignals = @($drillReport.evidence.collision_signals | ForEach-Object { [string]$_ }) + $collisionRetries = 0 + [void][int]::TryParse([string]$drillReport.evidence.collision_retries, [ref]$collisionRetries) + $collisionObserved = [bool]$drillReport.evidence.collision_observed + + $report.drill_report = [ordered]@{ + status = [string]$drillReport.status + reason_code = [string]$drillReport.reason_code + message = [string]$drillReport.message + collision_observed = $collisionObserved + collision_retries = $collisionRetries + collision_signals = @($collisionSignals) + release_verification_status = [string]$drillReport.evidence.release_verification_status + source_run_url = [string]$latestRun.url + } + + if ([string]$drillReport.status -ne 'pass' -or [string]$drillReport.reason_code -ne 'drill_passed') { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_reason_code_invalid' + throw ("Latest drill report is not passing. status={0} reason_code={1}" -f [string]$drillReport.status, [string]$drillReport.reason_code) + } + + if (-not $collisionObserved -or ($collisionRetries -lt 1 -and @($collisionSignals).Count -eq 0)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_collision_evidence_missing' + throw 'Latest drill report does not include required collision evidence.' + } + + if ([string]$drillReport.evidence.release_verification_status -ne 'pass') { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_release_verification_missing' + throw ("Latest drill report release verification is not pass. status={0}" -f [string]$drillReport.evidence.release_verification_status) + } + } + finally { + if (Test-Path -LiteralPath $downloadRoot -PathType Container) { + Remove-Item -LiteralPath $downloadRoot -Recurse -Force -ErrorAction SilentlyContinue + } + } + + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Release race-hardening gate passed.' +} +catch { + if ($reasonCodes.Count -eq 0) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'drill_gate_runtime_error' + } + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = [string]$_.Exception.Message +} +finally { + $report.generated_at_utc = Get-UtcNowIso + if ($warnings.Count -gt 0) { + $report.warnings = @($warnings) + } + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..cf87fd4 --- /dev/null +++ b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 @@ -0,0 +1,66 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Branch protection drift workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/branch-protection-drift-check.yml' + $script:verifyPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseBranchProtectionPolicy.ps1' + $script:applyPath = Join-Path $script:repoRoot 'scripts/Set-ReleaseBranchProtectionPolicy.ps1' + + foreach ($path in @($script:workflowPath, $script:verifyPath, $script:applyPath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Branch-protection drift contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:verifyContent = Get-Content -LiteralPath $script:verifyPath -Raw + $script:applyContent = Get-Content -LiteralPath $script:applyPath -Raw + } + + It 'runs on schedule, main push, and manual dispatch' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'main' + $script:workflowContent | Should -Match 'workflow_dispatch:' + } + + It 'verifies policy and publishes a machine-readable drift report' { + $script:workflowContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' + $script:workflowContent | Should -Match 'branch-protection-drift-report\.json' + $script:workflowContent | Should -Match 'Branch Protection Drift Check' + } + + It 'manages failure and recovery incidents for branch-protection drift' { + $script:workflowContent | Should -Match 'Branch Protection Drift Alert' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + $script:workflowContent | Should -Match 'issues:\s*write' + } + + It 'defines release branch-protection policy contract for main and integration lanes' { + $script:verifyContent | Should -Match 'main' + $script:verifyContent | Should -Match 'integration/\*' + $script:verifyContent | Should -Match 'CI Pipeline' + $script:verifyContent | Should -Match 'Integration Gate' + $script:verifyContent | Should -Match 'Release Race Hardening Drill' + $script:verifyContent | Should -Match 'main_rule_missing' + $script:verifyContent | Should -Match 'integration_rule_missing' + $script:verifyContent | Should -Match 'branch_protection_query_failed' + } + + It 'supports deterministic apply and verification of branch-protection policy' { + $script:applyContent | Should -Match 'CreateBranchProtectionRuleInput' + $script:applyContent | Should -Match 'UpdateBranchProtectionRuleInput' + $script:applyContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' + $script:applyContent | Should -Match 'reason_codes = if \(\$DryRun\)' + $script:applyContent | Should -Match "@\('dry_run'\)" + $script:applyContent | Should -Match "@\('applied'\)" + $script:applyContent | Should -Match 'verification_failed' + $script:applyContent | Should -Match 'apply_runtime_error' + } +} diff --git a/tests/IntegrationGateWorkflowContract.Tests.ps1 b/tests/IntegrationGateWorkflowContract.Tests.ps1 index 7376291..6cfd87b 100644 --- a/tests/IntegrationGateWorkflowContract.Tests.ps1 +++ b/tests/IntegrationGateWorkflowContract.Tests.ps1 @@ -16,7 +16,9 @@ Describe 'Integration gate workflow contract' { It 'runs on integration branch pushes and on demand' { $script:workflowContent | Should -Match 'name:\s*Integration Gate' $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'main' $script:workflowContent | Should -Match 'integration/\*\*' + $script:workflowContent | Should -Match 'pull_request:' $script:workflowContent | Should -Match 'workflow_dispatch:' $script:workflowContent | Should -Match 'ref:' } @@ -31,7 +33,11 @@ Describe 'Integration gate workflow contract' { )) { $script:workflowContent | Should -Match ([regex]::Escape($context)) } - $script:workflowContent | Should -Match 'repos/\$repo/commits/\$sha/status' + $script:workflowContent | Should -Match 'repos/\$repo/commits/\$sha/check-runs' + $script:workflowContent | Should -Match 'pull_request' + $script:workflowContent | Should -Match 'PR_HEAD_SHA' + $script:workflowContent | Should -Match 'neutral' + $script:workflowContent | Should -Match 'skipped' $script:workflowContent | Should -Match 'Start-Sleep -Seconds' $script:workflowContent | Should -Match 'Integration gate passed' $script:workflowContent | Should -Match 'Integration gate timed out' diff --git a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 index 1e0963d..2b03a3b 100644 --- a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 @@ -21,8 +21,6 @@ Describe 'Release race-hardening drill workflow contract' { It 'is scheduled and dispatchable with bounded drill controls' { $script:workflowContent | Should -Match 'schedule:' - $script:workflowContent | Should -Match 'push:' - $script:workflowContent | Should -Match 'integration/\*\*' $script:workflowContent | Should -Match 'workflow_dispatch:' $script:workflowContent | Should -Match 'auto_remediate' $script:workflowContent | Should -Match 'keep_latest_canary_n' diff --git a/tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1 b/tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..6d538ec --- /dev/null +++ b/tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1 @@ -0,0 +1,52 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release race-hardening gate workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/release-race-hardening-gate.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Test-ReleaseRaceHardeningGate.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Release race-hardening gate contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'runs on main and integration PR/push plus manual dispatch' { + $script:workflowContent | Should -Match 'push:' + $script:workflowContent | Should -Match 'pull_request:' + $script:workflowContent | Should -Match 'main' + $script:workflowContent | Should -Match 'integration/\*\*' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'source_branch' + $script:workflowContent | Should -Match 'max_age_hours' + } + + It 'exposes required check context and uploads gate report artifact' { + $script:workflowContent | Should -Match 'name:\s*Release Race Hardening Drill' + $script:workflowContent | Should -Match 'runs-on:\s*ubuntu-latest' + $script:workflowContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:workflowContent | Should -Match 'release-race-hardening-gate-report\.json' + } + + It 'validates latest successful drill report reason code and collision evidence' { + $script:runtimeContent | Should -Match 'Get-GhWorkflowRunsPortable' + $script:runtimeContent | Should -Match 'drill_run_missing' + $script:runtimeContent | Should -Match 'drill_run_stale' + $script:runtimeContent | Should -Match 'drill_report_download_failed' + $script:runtimeContent | Should -Match 'drill_reason_code_invalid' + $script:runtimeContent | Should -Match 'drill_collision_evidence_missing' + $script:runtimeContent | Should -Match 'drill_release_verification_missing' + $script:runtimeContent | Should -Match 'drill_gate_runtime_error' + $script:runtimeContent | Should -Match "reason_codes = @\('ok'\)" + $script:runtimeContent | Should -Match 'drill_passed' + $script:runtimeContent | Should -Match 'gh run download' + } +} diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index b459cbe..4b45b95 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -31,13 +31,19 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'ops-policy-drift-check\.yml' $script:runbookContent | Should -Match 'release-rollback-drill\.yml' $script:runbookContent | Should -Match 'release-race-hardening-drill\.yml' + $script:runbookContent | Should -Match 'release-race-hardening-gate\.yml' + $script:runbookContent | Should -Match 'branch-protection-drift-check\.yml' $script:runbookContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:runbookContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:runbookContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' + $script:runbookContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:runbookContent | Should -Match 'auto_self_heal=false' $script:runbookContent | Should -Match '20260226' $script:runbookContent | Should -Match 'release_verification_failed' $script:runbookContent | Should -Match 'control_plane_collision_not_observed' $script:runbookContent | Should -Match 'drill_passed' $script:runbookContent | Should -Match 'Release Race Hardening Drill Alert' + $script:runbookContent | Should -Match 'Branch Protection Drift Alert' $script:runbookContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:runbookContent | Should -Match 'promotion_lineage_invalid' $script:runbookContent | Should -Match 'stable_window_override_invalid' @@ -56,11 +62,16 @@ Describe 'Scope A ops runbook contract' { $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' + $script:readmeContent | Should -Match 'release-race-hardening-gate\.yml' + $script:readmeContent | Should -Match 'branch-protection-drift-check\.yml' $script:readmeContent | Should -Match 'Release Race Hardening Drill' $script:readmeContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:readmeContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:readmeContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' + $script:readmeContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:readmeContent | Should -Match 'release-ops-incident-response\.md' $script:agentsContent | Should -Match 'Ops Monitoring Policy' @@ -71,10 +82,16 @@ Describe 'Scope A ops runbook contract' { $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' + $script:agentsContent | Should -Match 'release-race-hardening-gate\.yml' + $script:agentsContent | Should -Match 'branch-protection-drift-check\.yml' $script:agentsContent | Should -Match 'Release Race Hardening Drill' + $script:agentsContent | Should -Match 'Branch Protection Drift Alert' $script:agentsContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:agentsContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:agentsContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' + $script:agentsContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' } } diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index cfb23c9..e5be053 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -37,6 +37,9 @@ Describe 'Workspace surface contract' { $script:rollbackDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' $script:rollbackSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-RollbackDrillSelfHealing.ps1' $script:raceHardeningDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRaceHardeningDrill.ps1' + $script:raceHardeningGateScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseRaceHardeningGate.ps1' + $script:releaseBranchProtectionPolicyScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseBranchProtectionPolicy.ps1' + $script:setReleaseBranchProtectionPolicyScriptPath = Join-Path $script:repoRoot 'scripts/Set-ReleaseBranchProtectionPolicy.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' $script:windowsContainerNsisSelfTestScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-WindowsContainerNsisSelfTest.ps1' $script:windowsContainerNsisDockerfilePath = Join-Path $script:repoRoot 'tools/nsis-selftest-windows/Dockerfile' @@ -54,8 +57,10 @@ Describe 'Workspace surface contract' { $script:canaryWorkflowPath = Join-Path $script:repoRoot '.github/workflows/nightly-supplychain-canary.yml' $script:opsSloGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' $script:opsPolicyDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-policy-drift-check.yml' + $script:branchProtectionDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/branch-protection-drift-check.yml' $script:rollbackDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' $script:raceHardeningDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-race-hardening-drill.yml' + $script:raceHardeningGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-race-hardening-gate.yml' $script:linuxNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-linux-nsis-parity-image.yml' $script:windowsNsisParityImagePublishWorkflowPath = Join-Path $script:repoRoot '.github/workflows/publish-windows-nsis-parity-image.yml' $script:windowsImageGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/windows-labview-image-gate.yml' @@ -106,6 +111,9 @@ Describe 'Workspace surface contract' { $script:rollbackDrillScriptPath, $script:rollbackSelfHealingScriptPath, $script:raceHardeningDrillScriptPath, + $script:raceHardeningGateScriptPath, + $script:releaseBranchProtectionPolicyScriptPath, + $script:setReleaseBranchProtectionPolicyScriptPath, $script:dockerLinuxIterationScriptPath, $script:windowsContainerNsisSelfTestScriptPath, $script:windowsContainerNsisDockerfilePath, @@ -123,8 +131,10 @@ Describe 'Workspace surface contract' { $script:canaryWorkflowPath, $script:opsSloGateWorkflowPath, $script:opsPolicyDriftWorkflowPath, + $script:branchProtectionDriftWorkflowPath, $script:rollbackDrillWorkflowPath, $script:raceHardeningDrillWorkflowPath, + $script:raceHardeningGateWorkflowPath, $script:linuxNsisParityImagePublishWorkflowPath, $script:windowsNsisParityImagePublishWorkflowPath, $script:windowsImageGateWorkflowPath, @@ -397,11 +407,17 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' + $script:agentsContent | Should -Match 'release-race-hardening-gate\.yml' + $script:agentsContent | Should -Match 'branch-protection-drift-check\.yml' $script:agentsContent | Should -Match 'Release Race Hardening Drill Alert' + $script:agentsContent | Should -Match 'Branch Protection Drift Alert' $script:agentsContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:agentsContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:agentsContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' + $script:agentsContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:agentsContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:agentsContent | Should -Match 'workflow_failure_detected' $script:agentsContent | Should -Match 'release_client_drift' @@ -438,11 +454,16 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' + $script:readmeContent | Should -Match 'release-race-hardening-gate\.yml' + $script:readmeContent | Should -Match 'branch-protection-drift-check\.yml' $script:readmeContent | Should -Match 'Release Race Hardening Drill' $script:readmeContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:readmeContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:readmeContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' + $script:readmeContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:readmeContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:readmeContent | Should -Match 'workflow_failure_detected' $script:readmeContent | Should -Match 'release_client_drift' @@ -491,6 +512,8 @@ Describe 'Workspace surface contract' { $script:ciWorkflowContent | Should -Match 'OpsSloGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'OpsPolicyDriftWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'ReleaseRaceHardeningDrillWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'ReleaseRaceHardeningGateWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'BranchProtectionDriftWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'ReleaseRollbackDrillWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxLabviewImageGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'LinuxContainerNsisParityContract\.Tests\.ps1' From f5feb3d8fb55b50a6c2bcd39aecb4d6937726563 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:13:00 -0800 Subject: [PATCH 47/60] Fix branch-protection apply/verify runtime edge cases --- scripts/Set-ReleaseBranchProtectionPolicy.ps1 | 119 +++++++++++------- .../Test-ReleaseBranchProtectionPolicy.ps1 | 38 +++++- ...hProtectionDriftWorkflowContract.Tests.ps1 | 4 +- 3 files changed, 112 insertions(+), 49 deletions(-) diff --git a/scripts/Set-ReleaseBranchProtectionPolicy.ps1 b/scripts/Set-ReleaseBranchProtectionPolicy.ps1 index 2bb86c1..b8b9e56 100644 --- a/scripts/Set-ReleaseBranchProtectionPolicy.ps1 +++ b/scripts/Set-ReleaseBranchProtectionPolicy.ps1 @@ -66,18 +66,32 @@ function Invoke-GraphQl { return (Invoke-GhJson -Arguments $args) } -function Invoke-GraphQlMutationWithInput { - param( - [Parameter(Mandatory = $true)][string]$Mutation, - [Parameter(Mandatory = $true)]$Input - ) +function ConvertTo-GraphQlStringLiteral { + param([Parameter(Mandatory = $true)][string]$Value) + + $escaped = $Value.Replace('\', '\\').Replace('"', '\"').Replace("`r", '\r').Replace("`n", '\n') + return '"' + $escaped + '"' +} + +function ConvertTo-GraphQlBooleanLiteral { + param([Parameter(Mandatory = $true)][bool]$Value) - $inputJson = $Input | ConvertTo-Json -Depth 50 -Compress - return (Invoke-GhJson -Arguments @( - 'api', 'graphql', - '-f', ("query={0}" -f $Mutation), - '-f', ("input={0}" -f $inputJson) - )) + if ($Value) { + return 'true' + } + + return 'false' +} + +function ConvertTo-GraphQlStringArrayLiteral { + param([Parameter(Mandatory = $true)][string[]]$Values) + + if (@($Values).Count -eq 0) { + return '[]' + } + + $encoded = @($Values | ForEach-Object { ConvertTo-GraphQlStringLiteral -Value ([string]$_) }) + return ('[' + ([string]::Join(',', $encoded)) + ']') } function Resolve-ExistingRules { @@ -192,8 +206,8 @@ try { $existingRules = @($repositoryNode.branchProtectionRules.nodes) $desired = @( - New-DesiredRuleSpec -Pattern $MainPattern -Contexts @($MainRequiredContexts), - New-DesiredRuleSpec -Pattern $IntegrationPattern -Contexts @($IntegrationRequiredContexts) + (New-DesiredRuleSpec -Pattern $MainPattern -Contexts @($MainRequiredContexts)) + (New-DesiredRuleSpec -Pattern $IntegrationPattern -Contexts @($IntegrationRequiredContexts)) ) $actionRecords = [System.Collections.Generic.List[object]]::new() @@ -221,10 +235,28 @@ try { continue } + $repositoryIdLiteral = ConvertTo-GraphQlStringLiteral -Value $repositoryId + $patternLiteral = ConvertTo-GraphQlStringLiteral -Value ([string]$spec.pattern) + $requiresStatusChecksLiteral = ConvertTo-GraphQlBooleanLiteral -Value ([bool]$spec.requiresStatusChecks) + $requiresStrictStatusChecksLiteral = ConvertTo-GraphQlBooleanLiteral -Value ([bool]$spec.requiresStrictStatusChecks) + $requiredStatusCheckContextsLiteral = ConvertTo-GraphQlStringArrayLiteral -Values @($spec.requiredStatusCheckContexts | ForEach-Object { [string]$_ }) + $allowsForcePushesLiteral = ConvertTo-GraphQlBooleanLiteral -Value ([bool]$spec.allowsForcePushes) + $allowsDeletionsLiteral = ConvertTo-GraphQlBooleanLiteral -Value ([bool]$spec.allowsDeletions) + if ($null -eq $currentRule) { - $createMutation = @' -mutation($input:CreateBranchProtectionRuleInput!) { - createBranchProtectionRule(input:$input) { + $createMutation = @" +mutation { + createBranchProtectionRule( + input: { + repositoryId: $repositoryIdLiteral + pattern: $patternLiteral + requiresStatusChecks: $requiresStatusChecksLiteral + requiresStrictStatusChecks: $requiresStrictStatusChecksLiteral + requiredStatusCheckContexts: $requiredStatusCheckContextsLiteral + allowsForcePushes: $allowsForcePushesLiteral + allowsDeletions: $allowsDeletionsLiteral + } + ) { branchProtectionRule { id pattern @@ -236,18 +268,12 @@ mutation($input:CreateBranchProtectionRuleInput!) { } } } -'@ - $createInput = [ordered]@{ - repositoryId = $repositoryId - pattern = [string]$spec.pattern - requiresStatusChecks = [bool]$spec.requiresStatusChecks - requiresStrictStatusChecks = [bool]$spec.requiresStrictStatusChecks - requiredStatusCheckContexts = @($spec.requiredStatusCheckContexts) - allowsForcePushes = [bool]$spec.allowsForcePushes - allowsDeletions = [bool]$spec.allowsDeletions - } +"@ - $createResult = Invoke-GraphQlMutationWithInput -Mutation $createMutation -Input $createInput + $createResult = Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', ("query={0}" -f $createMutation) + ) $createdRule = $createResult.data.createBranchProtectionRule.branchProtectionRule [void]$actionRecords.Add([ordered]@{ pattern = [string]$spec.pattern @@ -255,9 +281,20 @@ mutation($input:CreateBranchProtectionRuleInput!) { rule_id = [string]$createdRule.id }) } else { - $updateMutation = @' -mutation($input:UpdateBranchProtectionRuleInput!) { - updateBranchProtectionRule(input:$input) { + $ruleIdLiteral = ConvertTo-GraphQlStringLiteral -Value ([string]$currentRule.id) + $updateMutation = @" +mutation { + updateBranchProtectionRule( + input: { + branchProtectionRuleId: $ruleIdLiteral + pattern: $patternLiteral + requiresStatusChecks: $requiresStatusChecksLiteral + requiresStrictStatusChecks: $requiresStrictStatusChecksLiteral + requiredStatusCheckContexts: $requiredStatusCheckContextsLiteral + allowsForcePushes: $allowsForcePushesLiteral + allowsDeletions: $allowsDeletionsLiteral + } + ) { branchProtectionRule { id pattern @@ -269,18 +306,12 @@ mutation($input:UpdateBranchProtectionRuleInput!) { } } } -'@ - $updateInput = [ordered]@{ - branchProtectionRuleId = [string]$currentRule.id - pattern = [string]$spec.pattern - requiresStatusChecks = [bool]$spec.requiresStatusChecks - requiresStrictStatusChecks = [bool]$spec.requiresStrictStatusChecks - requiredStatusCheckContexts = @($spec.requiredStatusCheckContexts) - allowsForcePushes = [bool]$spec.allowsForcePushes - allowsDeletions = [bool]$spec.allowsDeletions - } +"@ - $updateResult = Invoke-GraphQlMutationWithInput -Mutation $updateMutation -Input $updateInput + $updateResult = Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', ("query={0}" -f $updateMutation) + ) $updatedRule = $updateResult.data.updateBranchProtectionRule.branchProtectionRule [void]$actionRecords.Add([ordered]@{ pattern = [string]$spec.pattern @@ -300,12 +331,14 @@ mutation($input:UpdateBranchProtectionRuleInput!) { } $verifyPath = Join-Path ([System.IO.Path]::GetTempPath()) ("branch-protection-verify-" + [Guid]::NewGuid().ToString('N') + '.json') + $mainContextsCsv = [string]::Join(',', @($MainRequiredContexts | ForEach-Object { [string]$_ })) + $integrationContextsCsv = [string]::Join(',', @($IntegrationRequiredContexts | ForEach-Object { [string]$_ })) & pwsh -NoProfile -File $verifyScript ` -Repository $Repository ` -MainPattern $MainPattern ` -IntegrationPattern $IntegrationPattern ` - -MainRequiredContexts @($MainRequiredContexts) ` - -IntegrationRequiredContexts @($IntegrationRequiredContexts) ` + -MainRequiredContexts $mainContextsCsv ` + -IntegrationRequiredContexts $integrationContextsCsv ` -OutputPath $verifyPath | Out-Null $verifyExit = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } if ($verifyExit -ne 0) { diff --git a/scripts/Test-ReleaseBranchProtectionPolicy.ps1 b/scripts/Test-ReleaseBranchProtectionPolicy.ps1 index 4f668fa..569157c 100644 --- a/scripts/Test-ReleaseBranchProtectionPolicy.ps1 +++ b/scripts/Test-ReleaseBranchProtectionPolicy.ps1 @@ -49,10 +49,37 @@ function Add-ReasonCode { } } +function Normalize-RequiredContexts { + param( + [Parameter()][string[]]$Values = @() + ) + + $normalized = [System.Collections.Generic.List[string]]::new() + foreach ($entry in @($Values)) { + $text = [string]$entry + if ([string]::IsNullOrWhiteSpace($text)) { + continue + } + + foreach ($segment in @($text.Split(','))) { + $candidate = [string]$segment + $candidate = $candidate.Trim() + if ([string]::IsNullOrWhiteSpace($candidate)) { + continue + } + + if (-not $normalized.Contains($candidate)) { + [void]$normalized.Add($candidate) + } + } + } + + return @($normalized) +} + function Test-RuleContract { param( - [Parameter(Mandatory = $true)]$Rule, - [Parameter(Mandatory = $true)][string]$Pattern, + [Parameter(Mandatory = $true)][AllowNull()]$Rule, [Parameter(Mandatory = $true)][string[]]$RequiredContexts ) @@ -103,6 +130,9 @@ function Test-RuleContract { $reasonCodes = [System.Collections.Generic.List[string]]::new() +$MainRequiredContexts = Normalize-RequiredContexts -Values @($MainRequiredContexts) +$IntegrationRequiredContexts = Normalize-RequiredContexts -Values @($IntegrationRequiredContexts) + $report = [ordered]@{ schema_version = '1.0' generated_at_utc = Get-UtcNowIso @@ -159,8 +189,8 @@ query($owner:String!, $name:String!) { $mainRule = @($rules | Where-Object { [string]$_.pattern -eq $MainPattern } | Select-Object -First 1) $integrationRule = @($rules | Where-Object { [string]$_.pattern -eq $IntegrationPattern } | Select-Object -First 1) - $mainCheck = Test-RuleContract -Rule ($mainRule | Select-Object -First 1) -Pattern $MainPattern -RequiredContexts @($MainRequiredContexts) - $integrationCheck = Test-RuleContract -Rule ($integrationRule | Select-Object -First 1) -Pattern $IntegrationPattern -RequiredContexts @($IntegrationRequiredContexts) + $mainCheck = Test-RuleContract -Rule ($mainRule | Select-Object -First 1) -RequiredContexts @($MainRequiredContexts) + $integrationCheck = Test-RuleContract -Rule ($integrationRule | Select-Object -First 1) -RequiredContexts @($IntegrationRequiredContexts) $report.actual.main_rule = [ordered]@{ pattern = $MainPattern diff --git a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 index cf87fd4..cd146ea 100644 --- a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 +++ b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 @@ -54,8 +54,8 @@ Describe 'Branch protection drift workflow contract' { } It 'supports deterministic apply and verification of branch-protection policy' { - $script:applyContent | Should -Match 'CreateBranchProtectionRuleInput' - $script:applyContent | Should -Match 'UpdateBranchProtectionRuleInput' + $script:applyContent | Should -Match 'createBranchProtectionRule' + $script:applyContent | Should -Match 'updateBranchProtectionRule' $script:applyContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:applyContent | Should -Match 'reason_codes = if \(\$DryRun\)' $script:applyContent | Should -Match "@\('dry_run'\)" From d1d73f716221165b5105c0c81fb95d6ce63a976c Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:27:05 -0800 Subject: [PATCH 48/60] Add autonomous release guardrails self-healing lane --- .github/workflows/ci.yml | 1 + .../release-guardrails-autoremediate.yml | 167 ++++++ AGENTS.md | 16 + README.md | 23 + .../runbooks/release-ops-incident-response.md | 37 ++ .../Invoke-ReleaseGuardrailsSelfHealing.ps1 | 501 ++++++++++++++++++ scripts/Test-PolicyContracts.ps1 | 6 + scripts/Test-ReleaseClientContracts.ps1 | 6 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 9 + .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 1 + tests/ReleaseClientPolicyContract.Tests.ps1 | 7 + ...sAutoRemediationWorkflowContract.Tests.ps1 | 68 +++ tests/ScopeAOpsRunbookContract.Tests.ps1 | 9 + tests/WorkspaceSurfaceContract.Tests.ps1 | 17 + .../workspace-governance.json | 8 + workspace-governance.json | 8 + 16 files changed, 884 insertions(+) create mode 100644 .github/workflows/release-guardrails-autoremediate.yml create mode 100644 scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 create mode 100644 tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 76e9d8f..14edb19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,6 +83,7 @@ jobs: './tests/OpsAutoRemediationWorkflowContract.Tests.ps1', './tests/OpsSloGateWorkflowContract.Tests.ps1', './tests/OpsPolicyDriftWorkflowContract.Tests.ps1', + './tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1', './tests/BranchProtectionDriftWorkflowContract.Tests.ps1', './tests/ReleaseControlPlaneWorkflowContract.Tests.ps1', './tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1', diff --git a/.github/workflows/release-guardrails-autoremediate.yml b/.github/workflows/release-guardrails-autoremediate.yml new file mode 100644 index 0000000..760d07c --- /dev/null +++ b/.github/workflows/release-guardrails-autoremediate.yml @@ -0,0 +1,167 @@ +name: release-guardrails-autoremediate + +on: + schedule: + - cron: '25 * * * *' + workflow_dispatch: + inputs: + race_gate_max_age_hours: + description: Maximum age of latest successful release race-hardening drill run. + required: false + default: '168' + type: string + auto_self_heal: + description: Apply bounded autonomous remediation actions. + required: false + default: true + type: boolean + max_attempts: + description: Maximum remediation attempts. + required: false + default: '1' + type: string + drill_watch_timeout_minutes: + description: Timeout minutes while watching release race-hardening drill remediation run. + required: false + default: '120' + type: string + +permissions: + contents: read + actions: write + issues: write + +jobs: + release-guardrails-autoremediate: + name: Release Guardrails Auto-Remediation + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Execute release guardrails auto-remediation + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-guardrails-autoremediate-report.json' + + $raceGateMaxAgeText = [string]'${{ inputs.race_gate_max_age_hours }}' + $raceGateMaxAgeHours = 168 + if (-not [string]::IsNullOrWhiteSpace($raceGateMaxAgeText)) { + $parsedRaceGateMaxAge = 0 + if (-not [int]::TryParse($raceGateMaxAgeText, [ref]$parsedRaceGateMaxAge)) { + throw "race_gate_max_age_hours must be an integer. actual='$raceGateMaxAgeText'" + } + $raceGateMaxAgeHours = $parsedRaceGateMaxAge + } + + $maxAttemptsText = [string]'${{ inputs.max_attempts }}' + $maxAttempts = 1 + if (-not [string]::IsNullOrWhiteSpace($maxAttemptsText)) { + $parsedMaxAttempts = 0 + if (-not [int]::TryParse($maxAttemptsText, [ref]$parsedMaxAttempts)) { + throw "max_attempts must be an integer. actual='$maxAttemptsText'" + } + $maxAttempts = $parsedMaxAttempts + } + + $watchTimeoutText = [string]'${{ inputs.drill_watch_timeout_minutes }}' + $watchTimeoutMinutes = 120 + if (-not [string]::IsNullOrWhiteSpace($watchTimeoutText)) { + $parsedWatchTimeout = 0 + if (-not [int]::TryParse($watchTimeoutText, [ref]$parsedWatchTimeout)) { + throw "drill_watch_timeout_minutes must be an integer. actual='$watchTimeoutText'" + } + $watchTimeoutMinutes = $parsedWatchTimeout + } + + $autoSelfHealText = [string]'${{ inputs.auto_self_heal }}' + $autoSelfHeal = $true + if (-not [string]::IsNullOrWhiteSpace($autoSelfHealText)) { + $autoSelfHeal = [System.Convert]::ToBoolean($autoSelfHealText) + } + + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 ` + -Repository '${{ github.repository }}' ` + -Branch 'main' ` + -RaceGateMaxAgeHours $raceGateMaxAgeHours ` + -AutoSelfHeal:$autoSelfHeal ` + -MaxAttempts $maxAttempts ` + -DrillWatchTimeoutMinutes $watchTimeoutMinutes ` + -OutputPath $reportPath + + - name: Upload release guardrails auto-remediation report + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-guardrails-autoremediate-report-${{ github.run_id }} + path: ${{ runner.temp }}/release-guardrails-autoremediate-report.json + if-no-files-found: error + + - name: Update release guardrails incident issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Guardrails Auto-Remediation Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-guardrails-autoremediate-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release guardrails report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Release guardrails auto-remediation failed. + + - Run: $env:RUN_URL + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Repository: $($report.repository) + - Branch: $($report.branch) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close release guardrails incident issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Release Guardrails Auto-Remediation Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-guardrails-autoremediate-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "release guardrails report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $body = @" + Release guardrails auto-remediation recovered. + + - Run: $env:RUN_URL + - Reason code: $($report.reason_code) + - Message: $($report.message) + - Repository: $($report.repository) + - Branch: $($report.branch) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/AGENTS.md b/AGENTS.md index 970d223..d7b50a8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -284,6 +284,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `runtime_images_missing` - `ops_control_plane_policy_missing` - `ops_control_plane_self_healing_missing` + - `ops_control_plane_guardrails_missing` - `ops_control_plane_stable_window_missing` - `ops_control_plane_stable_window_reason_pattern_missing` - `ops_control_plane_stable_window_reason_example_missing` @@ -311,7 +312,22 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `.github/workflows/release-race-hardening-gate.yml` must run `scripts/Test-ReleaseRaceHardeningGate.ps1` and provide required check context `Release Race Hardening Drill` for `main` + `integration/*` PR/push lanes. - Race-hardening gate must fail when latest successful drill evidence is missing/stale, `reason_code != drill_passed`, or collision evidence is absent. - `.github/workflows/branch-protection-drift-check.yml` must run `scripts/Test-ReleaseBranchProtectionPolicy.ps1` and maintain incident lifecycle title `Branch Protection Drift Alert`. +- `.github/workflows/release-guardrails-autoremediate.yml` must run `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` and maintain incident lifecycle title `Release Guardrails Auto-Remediation Alert`. - `scripts/Set-ReleaseBranchProtectionPolicy.ps1` is the deterministic apply path for required-check drift repair. +- Guardrails self-healing policy must remain explicit under `ops_control_plane_policy.self_healing.guardrails`: + - `remediation_workflow` + - `race_drill_workflow` + - `watch_timeout_minutes` + - `verify_after_remediation` + - `race_gate_max_age_hours` +- Guardrails self-healing reason codes must remain explicit: + - `already_healthy` + - `remediated` + - `auto_remediation_disabled` + - `no_automatable_action` + - `remediation_execution_failed` + - `remediation_verify_failed` + - `guardrails_self_heal_runtime_error` - Race-hardening drill reason codes must remain explicit: - `drill_passed` - `control_plane_collision_not_observed` diff --git a/README.md b/README.md index 20defb9..d958577 100644 --- a/README.md +++ b/README.md @@ -453,6 +453,7 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `runtime_images_missing` - `ops_control_plane_policy_missing` - `ops_control_plane_self_healing_missing` + - `ops_control_plane_guardrails_missing` - `ops_control_plane_stable_window_missing` - `ops_control_plane_stable_window_reason_pattern_missing` - `ops_control_plane_stable_window_reason_example_missing` @@ -507,6 +508,28 @@ It runs `scripts/Test-ReleaseRaceHardeningGate.ps1` and fails when: Use `scripts/Set-ReleaseBranchProtectionPolicy.ps1` to deterministically apply/repair required check contracts. +`release-guardrails-autoremediate.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` to: +- evaluate branch-protection drift and release race-hardening freshness in one pass +- auto-apply branch-protection policy via `Set-ReleaseBranchProtectionPolicy.ps1` when mismatch/missing rules are detected +- auto-dispatch `release-race-hardening-drill.yml` when drill freshness is missing or stale, then re-verify gate health +- fail with deterministic reason codes: + - `already_healthy` + - `remediated` + - `auto_remediation_disabled` + - `no_automatable_action` + - `remediation_execution_failed` + - `remediation_verify_failed` + - `guardrails_self_heal_runtime_error` + +Guardrails policy is codified in `installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails`: +- `remediation_workflow` +- `race_drill_workflow` +- `watch_timeout_minutes` +- `verify_after_remediation` +- `race_gate_max_age_hours` + +Incident lifecycle title for this lane is `Release Guardrails Auto-Remediation Alert`. + ## Local Docker package for control-plane exercise Run the local Docker harness (safe default, validate + dry-run): diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 73a95fa..2bc7c3a 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -336,6 +336,42 @@ pwsh -File .\scripts\Set-ReleaseBranchProtectionPolicy.ps1 ` Branch-protection drift incident title: - `Branch Protection Drift Alert` +## Release Guardrails Auto-Remediation +Dispatch autonomous guardrails remediation: + +```powershell +gh workflow run release-guardrails-autoremediate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -f race_gate_max_age_hours=168 ` + -f auto_self_heal=true ` + -f max_attempts=1 ` + -f drill_watch_timeout_minutes=120 +``` + +Run the same remediation path locally: + +```powershell +Set-Location D:\dev\labview-cdev-surface-fork +pwsh -File .\scripts\Invoke-ReleaseGuardrailsSelfHealing.ps1 ` + -Repository LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` + -Branch main ` + -RaceGateMaxAgeHours 168 ` + -AutoSelfHeal:$true ` + -MaxAttempts 1 ` + -DrillWatchTimeoutMinutes 120 +``` + +Deterministic guardrails reason codes: +- `already_healthy` +- `remediated` +- `auto_remediation_disabled` +- `no_automatable_action` +- `remediation_execution_failed` +- `remediation_verify_failed` +- `guardrails_self_heal_runtime_error` + +Guardrails incident title: +- `Release Guardrails Auto-Remediation Alert` + ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` @@ -344,5 +380,6 @@ Branch-protection drift incident title: - `release-race-hardening-weekly-summary.json` - `release-race-hardening-gate-report.json` - `branch-protection-drift-report.json` +- `release-guardrails-autoremediate-report.json` - sync guard run URL - parity SHAs (upstream and fork) diff --git a/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 b/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 new file mode 100644 index 0000000..cc40226 --- /dev/null +++ b/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 @@ -0,0 +1,501 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$Branch = 'main', + + [Parameter()] + [ValidatePattern('^[A-Za-z0-9._/-]+$')] + [string]$DrillWorkflow = 'release-race-hardening-drill.yml', + + [Parameter()] + [ValidateRange(1, 720)] + [int]$RaceGateMaxAgeHours = 168, + + [Parameter()] + [bool]$AutoSelfHeal = $true, + + [Parameter()] + [ValidateRange(1, 5)] + [int]$MaxAttempts = 1, + + [Parameter()] + [ValidateRange(5, 240)] + [int]$DrillWatchTimeoutMinutes = 120, + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +$branchPolicyScript = Join-Path $PSScriptRoot 'Test-ReleaseBranchProtectionPolicy.ps1' +$setBranchPolicyScript = Join-Path $PSScriptRoot 'Set-ReleaseBranchProtectionPolicy.ps1' +$raceGateScript = Join-Path $PSScriptRoot 'Test-ReleaseRaceHardeningGate.ps1' +$dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' +$watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' + +foreach ($requiredScript in @($branchPolicyScript, $setBranchPolicyScript, $raceGateScript, $dispatchWorkflowScript, $watchWorkflowScript)) { + if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { + throw "required_script_missing: $requiredScript" + } +} + +function ConvertTo-StringArray { + param([Parameter()][AllowNull()]$Value) + + if ($null -eq $Value) { + return @() + } + + if ($Value -is [string]) { + if ([string]::IsNullOrWhiteSpace([string]$Value)) { + return @() + } + return @([string]$Value) + } + + $items = [System.Collections.Generic.List[string]]::new() + foreach ($entry in @($Value)) { + $text = [string]$entry + if ([string]::IsNullOrWhiteSpace($text)) { + continue + } + if (-not $items.Contains($text)) { + [void]$items.Add($text) + } + } + + return @($items) +} + +function Get-PropertyValueOrDefault { + param( + [Parameter()][AllowNull()]$Object, + [Parameter(Mandatory = $true)][string]$Name, + [Parameter()][AllowNull()]$DefaultValue = $null + ) + + if ($null -eq $Object) { + return $DefaultValue + } + + $property = $Object.PSObject.Properties[$Name] + if ($null -eq $property) { + return $DefaultValue + } + + return $property.Value +} + +function Get-ReasonCodesFromReport { + param([Parameter()][AllowNull()]$Report) + + return @( + ConvertTo-StringArray -Value (Get-PropertyValueOrDefault -Object $Report -Name 'reason_codes' -DefaultValue @()) + ) +} + +function Format-ReasonCodeSet { + param([Parameter()][string[]]$ReasonCodes = @()) + + $normalized = ConvertTo-StringArray -Value $ReasonCodes + if (@($normalized).Count -eq 0) { + return 'none' + } + + return [string]::Join(',', @($normalized)) +} + +function Test-ContainsAnyReasonCode { + param( + [Parameter()][string[]]$Source = @(), + [Parameter()][string[]]$Candidates = @() + ) + + $normalizedSource = ConvertTo-StringArray -Value $Source + foreach ($reason in @($normalizedSource)) { + if (@($Candidates) -contains [string]$reason) { + return $true + } + } + + return $false +} + +function Invoke-BranchPolicyAssessment { + param( + [Parameter(Mandatory = $true)][string]$ScriptPath, + [Parameter(Mandatory = $true)][string]$RepositorySlug, + [Parameter(Mandatory = $true)][string]$ReportPath + ) + + $runtimeError = '' + $exitCode = 1 + try { + & pwsh -NoProfile -File $ScriptPath ` + -Repository $RepositorySlug ` + -OutputPath $ReportPath | Out-Null + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + } catch { + $runtimeError = [string]$_.Exception.Message + $exitCode = 1 + } + + $report = $null + if (Test-Path -LiteralPath $ReportPath -PathType Leaf) { + $report = Get-Content -LiteralPath $ReportPath -Raw | ConvertFrom-Json -Depth 100 -ErrorAction Stop + } + + if ($null -eq $report) { + $report = [pscustomobject]@{ + status = 'fail' + reason_codes = @('branch_policy_report_missing') + message = if ([string]::IsNullOrWhiteSpace($runtimeError)) { "branch_policy_report_missing: $ReportPath" } else { $runtimeError } + } + } elseif (-not [string]::IsNullOrWhiteSpace($runtimeError)) { + $report.status = 'fail' + $report.reason_codes = @('branch_policy_runtime_error') + $report.message = $runtimeError + } + + return [pscustomobject]@{ + exit_code = $exitCode + report = $report + } +} + +function Invoke-RaceGateAssessment { + param( + [Parameter(Mandatory = $true)][string]$ScriptPath, + [Parameter(Mandatory = $true)][string]$RepositorySlug, + [Parameter(Mandatory = $true)][string]$SourceBranch, + [Parameter(Mandatory = $true)][int]$MaxAgeHours, + [Parameter(Mandatory = $true)][string]$ReportPath + ) + + $runtimeError = '' + $exitCode = 1 + try { + & pwsh -NoProfile -File $ScriptPath ` + -Repository $RepositorySlug ` + -SourceBranch $SourceBranch ` + -MaxAgeHours $MaxAgeHours ` + -OutputPath $ReportPath | Out-Null + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + } catch { + $runtimeError = [string]$_.Exception.Message + $exitCode = 1 + } + + $report = $null + if (Test-Path -LiteralPath $ReportPath -PathType Leaf) { + $report = Get-Content -LiteralPath $ReportPath -Raw | ConvertFrom-Json -Depth 100 -ErrorAction Stop + } + + if ($null -eq $report) { + $report = [pscustomobject]@{ + status = 'fail' + reason_codes = @('race_gate_report_missing') + message = if ([string]::IsNullOrWhiteSpace($runtimeError)) { "race_gate_report_missing: $ReportPath" } else { $runtimeError } + } + } elseif (-not [string]::IsNullOrWhiteSpace($runtimeError)) { + $report.status = 'fail' + $report.reason_codes = @('race_gate_runtime_error') + $report.message = $runtimeError + } + + return [pscustomobject]@{ + exit_code = $exitCode + report = $report + } +} + +function New-AssessmentSummary { + param([Parameter(Mandatory = $true)]$Assessment) + + $assessmentReport = $Assessment.report + return [ordered]@{ + status = [string](Get-PropertyValueOrDefault -Object $assessmentReport -Name 'status' -DefaultValue 'fail') + reason_codes = @( + ConvertTo-StringArray -Value (Get-PropertyValueOrDefault -Object $assessmentReport -Name 'reason_codes' -DefaultValue @()) + ) + message = [string](Get-PropertyValueOrDefault -Object $assessmentReport -Name 'message' -DefaultValue '') + exit_code = [int]$Assessment.exit_code + } +} + +function Test-IsAssessmentPass { + param([Parameter(Mandatory = $true)]$Assessment) + + return ([string](Get-PropertyValueOrDefault -Object $Assessment.report -Name 'status' -DefaultValue 'fail') -eq 'pass') +} + +$scratchRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("release-guardrails-self-heal-" + [Guid]::NewGuid().ToString('N')) +New-Item -Path $scratchRoot -ItemType Directory -Force | Out-Null + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + branch = $Branch + drill_workflow = $DrillWorkflow + race_gate_max_age_hours = $RaceGateMaxAgeHours + auto_self_heal = [bool]$AutoSelfHeal + max_attempts = $MaxAttempts + drill_watch_timeout_minutes = $DrillWatchTimeoutMinutes + status = 'fail' + reason_code = '' + message = '' + initial_assessment = $null + remediation_attempts = @() + final_assessment = $null +} + +try { + $initialBranchPath = Join-Path $scratchRoot 'initial-branch-policy.json' + $initialRacePath = Join-Path $scratchRoot 'initial-race-gate.json' + $currentBranchAssessment = Invoke-BranchPolicyAssessment -ScriptPath $branchPolicyScript -RepositorySlug $Repository -ReportPath $initialBranchPath + $currentRaceAssessment = Invoke-RaceGateAssessment -ScriptPath $raceGateScript -RepositorySlug $Repository -SourceBranch $Branch -MaxAgeHours $RaceGateMaxAgeHours -ReportPath $initialRacePath + + $report.initial_assessment = [ordered]@{ + branch_protection = New-AssessmentSummary -Assessment $currentBranchAssessment + release_race_gate = New-AssessmentSummary -Assessment $currentRaceAssessment + } + $report.final_assessment = $report.initial_assessment + + $branchPass = Test-IsAssessmentPass -Assessment $currentBranchAssessment + $racePass = Test-IsAssessmentPass -Assessment $currentRaceAssessment + + if ($branchPass -and $racePass) { + $report.status = 'pass' + $report.reason_code = 'already_healthy' + $report.message = 'Release guardrails are already passing. No remediation required.' + } elseif (-not $AutoSelfHeal) { + $report.status = 'fail' + $report.reason_code = 'auto_remediation_disabled' + $report.message = 'Release guardrails failed and auto-remediation is disabled.' + } else { + $branchAutomatableReasons = @( + 'main_rule_missing', + 'main_rule_mismatch', + 'integration_rule_missing', + 'integration_rule_mismatch' + ) + $raceAutomatableReasons = @( + 'drill_run_missing', + 'drill_run_stale', + 'drill_report_missing', + 'drill_report_download_failed' + ) + + $attemptRecords = [System.Collections.Generic.List[object]]::new() + $executionFailureCount = 0 + $noAutomatableAction = $false + $recovered = $false + $normalizedMaxAttempts = [Math]::Max(1, [Math]::Min($MaxAttempts, 5)) + + for ($attempt = 1; $attempt -le $normalizedMaxAttempts; $attempt++) { + $attemptRecord = [ordered]@{ + attempt = $attempt + status = 'pending' + pre_assessment = [ordered]@{ + branch_protection = New-AssessmentSummary -Assessment $currentBranchAssessment + release_race_gate = New-AssessmentSummary -Assessment $currentRaceAssessment + } + actions = @() + error = '' + post_assessment = $null + } + + $actions = [System.Collections.Generic.List[object]]::new() + $attemptHasAutomatableAction = $false + $attemptExecutionError = '' + + $preBranchReasonCodes = Get-ReasonCodesFromReport -Report $currentBranchAssessment.report + $preRaceReasonCodes = Get-ReasonCodesFromReport -Report $currentRaceAssessment.report + $branchRequiresRemediation = (-not (Test-IsAssessmentPass -Assessment $currentBranchAssessment)) + $raceRequiresRemediation = (-not (Test-IsAssessmentPass -Assessment $currentRaceAssessment)) + + try { + if ($branchRequiresRemediation) { + if (Test-ContainsAnyReasonCode -Source @($preBranchReasonCodes) -Candidates @($branchAutomatableReasons)) { + $applyPath = Join-Path $scratchRoot ("attempt-{0}-branch-apply.json" -f $attempt) + & pwsh -NoProfile -File $setBranchPolicyScript ` + -Repository $Repository ` + -OutputPath $applyPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "branch_protection_apply_failed: attempt=$attempt exit_code=$LASTEXITCODE" + } + + $applyReport = Get-Content -LiteralPath $applyPath -Raw | ConvertFrom-Json -Depth 100 -ErrorAction Stop + [void]$actions.Add([ordered]@{ + action = 'apply_branch_protection_policy' + status = [string](Get-PropertyValueOrDefault -Object $applyReport -Name 'status' -DefaultValue 'unknown') + reason_codes = @( + ConvertTo-StringArray -Value (Get-PropertyValueOrDefault -Object $applyReport -Name 'reason_codes' -DefaultValue @()) + ) + message = [string](Get-PropertyValueOrDefault -Object $applyReport -Name 'message' -DefaultValue '') + }) + $attemptHasAutomatableAction = $true + } else { + [void]$actions.Add([ordered]@{ + action = 'apply_branch_protection_policy' + status = 'skipped' + reason_codes = @('no_automatable_reason_code') + message = "Branch protection check failed with non-automatable reason codes: $(Format-ReasonCodeSet -ReasonCodes $preBranchReasonCodes)" + }) + } + } + + if ($raceRequiresRemediation) { + if (Test-ContainsAnyReasonCode -Source @($preRaceReasonCodes) -Candidates @($raceAutomatableReasons)) { + $dispatchPath = Join-Path $scratchRoot ("attempt-{0}-race-drill-dispatch.json" -f $attempt) + $dispatchInputs = @( + 'auto_remediate=true', + 'keep_latest_canary_n=1', + "watch_timeout_minutes=$DrillWatchTimeoutMinutes" + ) + & pwsh -NoProfile -File $dispatchWorkflowScript ` + -Repository $Repository ` + -WorkflowFile $DrillWorkflow ` + -Branch $Branch ` + -Inputs $dispatchInputs ` + -OutputPath $dispatchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "race_drill_dispatch_failed: attempt=$attempt exit_code=$LASTEXITCODE" + } + $dispatchReport = Get-Content -LiteralPath $dispatchPath -Raw | ConvertFrom-Json -Depth 100 -ErrorAction Stop + [void]$actions.Add([ordered]@{ + action = 'dispatch_release_race_hardening_drill' + status = 'success' + run_id = [string](Get-PropertyValueOrDefault -Object $dispatchReport -Name 'run_id' -DefaultValue '') + run_url = [string](Get-PropertyValueOrDefault -Object $dispatchReport -Name 'url' -DefaultValue '') + }) + + $watchPath = Join-Path $scratchRoot ("attempt-{0}-race-drill-watch.json" -f $attempt) + & pwsh -NoProfile -File $watchWorkflowScript ` + -Repository $Repository ` + -RunId ([string](Get-PropertyValueOrDefault -Object $dispatchReport -Name 'run_id' -DefaultValue '')) ` + -TimeoutMinutes $DrillWatchTimeoutMinutes ` + -OutputPath $watchPath | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "race_drill_watch_failed: attempt=$attempt run_id=$([string](Get-PropertyValueOrDefault -Object $dispatchReport -Name 'run_id' -DefaultValue '')) exit_code=$LASTEXITCODE" + } + + $watchReport = Get-Content -LiteralPath $watchPath -Raw | ConvertFrom-Json -Depth 100 -ErrorAction Stop + [void]$actions.Add([ordered]@{ + action = 'watch_release_race_hardening_drill' + status = [string](Get-PropertyValueOrDefault -Object $watchReport -Name 'conclusion' -DefaultValue 'unknown') + run_id = [string](Get-PropertyValueOrDefault -Object $watchReport -Name 'run_id' -DefaultValue '') + run_url = [string](Get-PropertyValueOrDefault -Object $watchReport -Name 'url' -DefaultValue '') + classified_reason = [string](Get-PropertyValueOrDefault -Object $watchReport -Name 'classified_reason' -DefaultValue '') + }) + + $attemptHasAutomatableAction = $true + } else { + [void]$actions.Add([ordered]@{ + action = 'dispatch_release_race_hardening_drill' + status = 'skipped' + reason_codes = @('no_automatable_reason_code') + message = "Race-hardening gate failed with non-automatable reason codes: $(Format-ReasonCodeSet -ReasonCodes $preRaceReasonCodes)" + }) + } + } + } catch { + $executionFailureCount++ + $attemptExecutionError = [string]$_.Exception.Message + } + + $attemptRecord.actions = @($actions) + + if (-not [string]::IsNullOrWhiteSpace($attemptExecutionError)) { + $attemptRecord.status = 'remediation_execution_failed' + $attemptRecord.error = $attemptExecutionError + [void]$attemptRecords.Add($attemptRecord) + continue + } + + if (-not $attemptHasAutomatableAction) { + $attemptRecord.status = 'no_automatable_action' + $attemptRecord.error = 'No automatable guardrail remediation path for current reason codes.' + [void]$attemptRecords.Add($attemptRecord) + $noAutomatableAction = $true + break + } + + $verifyBranchPath = Join-Path $scratchRoot ("attempt-{0}-verify-branch-policy.json" -f $attempt) + $verifyRacePath = Join-Path $scratchRoot ("attempt-{0}-verify-race-gate.json" -f $attempt) + $currentBranchAssessment = Invoke-BranchPolicyAssessment -ScriptPath $branchPolicyScript -RepositorySlug $Repository -ReportPath $verifyBranchPath + $currentRaceAssessment = Invoke-RaceGateAssessment -ScriptPath $raceGateScript -RepositorySlug $Repository -SourceBranch $Branch -MaxAgeHours $RaceGateMaxAgeHours -ReportPath $verifyRacePath + + $attemptRecord.post_assessment = [ordered]@{ + branch_protection = New-AssessmentSummary -Assessment $currentBranchAssessment + release_race_gate = New-AssessmentSummary -Assessment $currentRaceAssessment + } + + if ((Test-IsAssessmentPass -Assessment $currentBranchAssessment) -and (Test-IsAssessmentPass -Assessment $currentRaceAssessment)) { + $attemptRecord.status = 'recovered' + [void]$attemptRecords.Add($attemptRecord) + $recovered = $true + break + } + + $attemptRecord.status = 'verify_failed' + [void]$attemptRecords.Add($attemptRecord) + } + + $report.remediation_attempts = @($attemptRecords) + $report.final_assessment = [ordered]@{ + branch_protection = New-AssessmentSummary -Assessment $currentBranchAssessment + release_race_gate = New-AssessmentSummary -Assessment $currentRaceAssessment + } + + if ($recovered) { + $report.status = 'pass' + $report.reason_code = 'remediated' + $report.message = 'Release guardrails auto-remediation completed and verification passed.' + } elseif ($noAutomatableAction) { + $report.status = 'fail' + $report.reason_code = 'no_automatable_action' + $finalBranchReasons = Get-ReasonCodesFromReport -Report $currentBranchAssessment.report + $finalRaceReasons = Get-ReasonCodesFromReport -Report $currentRaceAssessment.report + $report.message = "No automatable remediation path. branch_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalBranchReasons) race_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalRaceReasons)" + } elseif ($executionFailureCount -gt 0) { + $report.status = 'fail' + $report.reason_code = 'remediation_execution_failed' + $report.message = 'One or more remediation execution steps failed before verification could pass.' + } else { + $report.status = 'fail' + $report.reason_code = 'remediation_verify_failed' + $finalBranchReasons = Get-ReasonCodesFromReport -Report $currentBranchAssessment.report + $finalRaceReasons = Get-ReasonCodesFromReport -Report $currentRaceAssessment.report + $report.message = "Guardrails remain failing after bounded remediation. branch_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalBranchReasons) race_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalRaceReasons)" + } + } +} +catch { + $report.status = 'fail' + $report.reason_code = 'guardrails_self_heal_runtime_error' + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null + if (Test-Path -LiteralPath $scratchRoot -PathType Container) { + Remove-Item -LiteralPath $scratchRoot -Recurse -Force -ErrorAction SilentlyContinue + } +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 491e135..296be2e 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -177,6 +177,7 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_title_release_guardrails' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Guardrails Auto-Remediation Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_mode' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode -eq 'dual-mode-semver-preferred') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) @@ -194,6 +195,11 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes -eq 45) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_slo_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_guardrails_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.remediation_workflow -eq 'release-guardrails-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.remediation_workflow) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_guardrails_race_drill_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_drill_workflow -eq 'release-race-hardening-drill.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_drill_workflow) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_guardrails_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.guardrails.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.watch_timeout_minutes) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_guardrails_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.guardrails.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.verify_after_remediation) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_guardrails_race_gate_max_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_gate_max_age_hours -eq 168) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_gate_max_age_hours) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow -eq 'release-workspace-installer.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_branch' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch -eq 'main') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_self_healing_rollback_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index d6119c8..2fab11d 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -101,6 +101,7 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_incident_auto_close_on_recovery' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Name 'ops_policy_incident_reopen_on_regression' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) Add-Check -Name 'ops_policy_incident_title_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Control Plane Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) + Add-Check -Name 'ops_policy_incident_title_release_guardrails' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Guardrails Auto-Remediation Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) Add-Check -Name 'ops_policy_tag_strategy_mode' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode -eq 'dual-mode-semver-preferred') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode) Add-Check -Name 'ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) Add-Check -Name 'ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) @@ -118,6 +119,11 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_self_healing_slo_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow -eq 'ops-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow) Add-Check -Name 'ops_policy_self_healing_slo_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes -eq 45) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes) Add-Check -Name 'ops_policy_self_healing_slo_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation) + Add-Check -Name 'ops_policy_self_healing_guardrails_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.remediation_workflow -eq 'release-guardrails-autoremediate.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.remediation_workflow) + Add-Check -Name 'ops_policy_self_healing_guardrails_race_drill_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_drill_workflow -eq 'release-race-hardening-drill.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_drill_workflow) + Add-Check -Name 'ops_policy_self_healing_guardrails_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.guardrails.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.watch_timeout_minutes) + Add-Check -Name 'ops_policy_self_healing_guardrails_verify' -Passed ([bool]$releaseClient.ops_control_plane_policy.self_healing.guardrails.verify_after_remediation) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.verify_after_remediation) + Add-Check -Name 'ops_policy_self_healing_guardrails_race_gate_max_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_gate_max_age_hours -eq 168) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.guardrails.race_gate_max_age_hours) Add-Check -Name 'ops_policy_self_healing_rollback_workflow' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow -eq 'release-workspace-installer.yml') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow) Add-Check -Name 'ops_policy_self_healing_rollback_branch' -Passed ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch -eq 'main') -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch) Add-Check -Name 'ops_policy_self_healing_rollback_watch_timeout' -Passed ([int]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes -eq 120) -Detail ([string]$releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index bbed36d..d99ab94 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -99,6 +99,15 @@ try { }) | Out-Null if (-not $selfHealingPresent) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_self_healing_missing' + } else { + $guardrailsPolicyPresent = ($null -ne $releaseClient.ops_control_plane_policy.self_healing.guardrails) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_self_healing_guardrails_present' + passed = $guardrailsPolicyPresent + }) | Out-Null + if (-not $guardrailsPolicyPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_guardrails_missing' + } } $stableWindowPresent = ($null -ne $releaseClient.ops_control_plane_policy.stable_promotion_window) diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index b542ed7..664d1cd 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -40,6 +40,7 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'runtime_images_missing' $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' $script:runtimeContent | Should -Match 'ops_control_plane_self_healing_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_guardrails_missing' $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_missing' $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_reason_pattern_missing' $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_reason_example_missing' diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index ab2f0e5..30a1045 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -73,12 +73,18 @@ Describe 'Release client policy contract' { ([string]$releaseClient.ops_control_plane_policy.stable_promotion_window.override_reason_example) | Should -Match '^CHG-' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops SLO Gate Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' + @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Guardrails Auto-Remediation Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' $releaseClient.ops_control_plane_policy.self_healing.enabled | Should -BeTrue $releaseClient.ops_control_plane_policy.self_healing.max_attempts | Should -Be 1 $releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow | Should -Be 'ops-autoremediate.yml' $releaseClient.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes | Should -Be 45 $releaseClient.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation | Should -BeTrue + $releaseClient.ops_control_plane_policy.self_healing.guardrails.remediation_workflow | Should -Be 'release-guardrails-autoremediate.yml' + $releaseClient.ops_control_plane_policy.self_healing.guardrails.race_drill_workflow | Should -Be 'release-race-hardening-drill.yml' + $releaseClient.ops_control_plane_policy.self_healing.guardrails.watch_timeout_minutes | Should -Be 120 + $releaseClient.ops_control_plane_policy.self_healing.guardrails.verify_after_remediation | Should -BeTrue + $releaseClient.ops_control_plane_policy.self_healing.guardrails.race_gate_max_age_hours | Should -Be 168 $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_workflow | Should -Be 'release-workspace-installer.yml' $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.release_branch | Should -Be 'main' $releaseClient.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes | Should -Be 120 @@ -109,6 +115,7 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_pattern_exists' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_example' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_enabled' + $script:policyScriptContent | Should -Match 'ops_policy_self_healing_guardrails_workflow' $script:policyScriptContent | Should -Match 'ops_policy_self_healing_rollback_workflow' $script:policyScriptContent | Should -Match 'ops_policy_rollback_release_limit' } diff --git a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..939ec05 --- /dev/null +++ b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 @@ -0,0 +1,68 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release guardrails auto-remediation workflow contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/release-guardrails-autoremediate.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Release guardrails contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable with deterministic inputs' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'race_gate_max_age_hours' + $script:workflowContent | Should -Match 'auto_self_heal' + $script:workflowContent | Should -Match 'max_attempts' + $script:workflowContent | Should -Match 'drill_watch_timeout_minutes' + $script:workflowContent | Should -Match 'actions:\s*write' + $script:workflowContent | Should -Match 'issues:\s*write' + } + + It 'executes guardrail runtime and incident lifecycle management' { + $script:workflowContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' + $script:workflowContent | Should -Match 'release-guardrails-autoremediate-report\.json' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match 'Release Guardrails Auto-Remediation Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + } + + It 'enforces autonomous remediation paths for branch protection and race gate freshness' { + $script:runtimeContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' + $script:runtimeContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' + $script:runtimeContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' + $script:runtimeContent | Should -Match 'Dispatch-WorkflowAtRemoteHead\.ps1' + $script:runtimeContent | Should -Match 'Watch-WorkflowRun\.ps1' + $script:runtimeContent | Should -Match 'drill_run_missing' + $script:runtimeContent | Should -Match 'drill_run_stale' + $script:runtimeContent | Should -Match 'apply_branch_protection_policy' + $script:runtimeContent | Should -Match 'dispatch_release_race_hardening_drill' + } + + It 'keeps deterministic self-healing reason codes explicit' { + foreach ($reasonCode in @( + 'already_healthy', + 'remediated', + 'auto_remediation_disabled', + 'no_automatable_action', + 'remediation_execution_failed', + 'remediation_verify_failed', + 'guardrails_self_heal_runtime_error' + )) { + $pattern = [regex]::Escape($reasonCode) + $script:runtimeContent | Should -Match $pattern + } + } +} diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index 4b45b95..c0f9a4e 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -29,11 +29,13 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:runbookContent | Should -Match 'ops-slo-gate\.yml' $script:runbookContent | Should -Match 'ops-policy-drift-check\.yml' + $script:runbookContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:runbookContent | Should -Match 'release-rollback-drill\.yml' $script:runbookContent | Should -Match 'release-race-hardening-drill\.yml' $script:runbookContent | Should -Match 'release-race-hardening-gate\.yml' $script:runbookContent | Should -Match 'branch-protection-drift-check\.yml' $script:runbookContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:runbookContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' $script:runbookContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' $script:runbookContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' $script:runbookContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' @@ -44,6 +46,7 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'drill_passed' $script:runbookContent | Should -Match 'Release Race Hardening Drill Alert' $script:runbookContent | Should -Match 'Branch Protection Drift Alert' + $script:runbookContent | Should -Match 'Release Guardrails Auto-Remediation Alert' $script:runbookContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:runbookContent | Should -Match 'promotion_lineage_invalid' $script:runbookContent | Should -Match 'stable_window_override_invalid' @@ -60,15 +63,18 @@ Describe 'Scope A ops runbook contract' { $script:readmeContent | Should -Match 'canary-smoke-tag-hygiene\.yml' $script:readmeContent | Should -Match 'ops-slo-gate\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' + $script:readmeContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-gate\.yml' $script:readmeContent | Should -Match 'branch-protection-drift-check\.yml' $script:readmeContent | Should -Match 'Release Race Hardening Drill' + $script:readmeContent | Should -Match 'Release Guardrails Auto-Remediation Alert' $script:readmeContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:readmeContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' $script:readmeContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' $script:readmeContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' $script:readmeContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' @@ -80,16 +86,19 @@ Describe 'Scope A ops runbook contract' { $script:agentsContent | Should -Match 'canary-smoke-tag-hygiene\.yml' $script:agentsContent | Should -Match 'ops-slo-gate\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' + $script:agentsContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-gate\.yml' $script:agentsContent | Should -Match 'branch-protection-drift-check\.yml' $script:agentsContent | Should -Match 'Release Race Hardening Drill' $script:agentsContent | Should -Match 'Branch Protection Drift Alert' + $script:agentsContent | Should -Match 'Release Guardrails Auto-Remediation Alert' $script:agentsContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:agentsContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' $script:agentsContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' $script:agentsContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' $script:agentsContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index e5be053..1a53fe2 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -38,6 +38,7 @@ Describe 'Workspace surface contract' { $script:rollbackSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-RollbackDrillSelfHealing.ps1' $script:raceHardeningDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRaceHardeningDrill.ps1' $script:raceHardeningGateScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseRaceHardeningGate.ps1' + $script:releaseGuardrailsSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1' $script:releaseBranchProtectionPolicyScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseBranchProtectionPolicy.ps1' $script:setReleaseBranchProtectionPolicyScriptPath = Join-Path $script:repoRoot 'scripts/Set-ReleaseBranchProtectionPolicy.ps1' $script:dockerLinuxIterationScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-DockerDesktopLinuxIteration.ps1' @@ -57,6 +58,7 @@ Describe 'Workspace surface contract' { $script:canaryWorkflowPath = Join-Path $script:repoRoot '.github/workflows/nightly-supplychain-canary.yml' $script:opsSloGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' $script:opsPolicyDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-policy-drift-check.yml' + $script:releaseGuardrailsAutoRemediationWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-guardrails-autoremediate.yml' $script:branchProtectionDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/branch-protection-drift-check.yml' $script:rollbackDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' $script:raceHardeningDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-race-hardening-drill.yml' @@ -112,6 +114,7 @@ Describe 'Workspace surface contract' { $script:rollbackSelfHealingScriptPath, $script:raceHardeningDrillScriptPath, $script:raceHardeningGateScriptPath, + $script:releaseGuardrailsSelfHealingScriptPath, $script:releaseBranchProtectionPolicyScriptPath, $script:setReleaseBranchProtectionPolicyScriptPath, $script:dockerLinuxIterationScriptPath, @@ -131,6 +134,7 @@ Describe 'Workspace surface contract' { $script:canaryWorkflowPath, $script:opsSloGateWorkflowPath, $script:opsPolicyDriftWorkflowPath, + $script:releaseGuardrailsAutoRemediationWorkflowPath, $script:branchProtectionDriftWorkflowPath, $script:rollbackDrillWorkflowPath, $script:raceHardeningDrillWorkflowPath, @@ -306,12 +310,18 @@ Describe 'Workspace surface contract' { ([string]$script:manifest.installer_contract.release_client.ops_control_plane_policy.stable_promotion_window.override_reason_example) | Should -Match '^CHG-' (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops SLO Gate Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Guardrails Auto-Remediation Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.enabled | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.max_attempts | Should -Be 1 $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow | Should -Be 'ops-autoremediate.yml' $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.watch_timeout_minutes | Should -Be 45 $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.verify_after_remediation | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails.remediation_workflow | Should -Be 'release-guardrails-autoremediate.yml' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails.race_drill_workflow | Should -Be 'release-race-hardening-drill.yml' + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails.watch_timeout_minutes | Should -Be 120 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails.verify_after_remediation | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails.race_gate_max_age_hours | Should -Be 168 $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.release_workflow | Should -Be 'release-workspace-installer.yml' $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.release_branch | Should -Be 'main' $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.rollback_drill.watch_timeout_minutes | Should -Be 120 @@ -405,16 +415,19 @@ Describe 'Workspace surface contract' { $script:agentsContent | Should -Match 'ops_control_plane_policy' $script:agentsContent | Should -Match 'ops-slo-gate\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' + $script:agentsContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-drill\.yml' $script:agentsContent | Should -Match 'release-race-hardening-gate\.yml' $script:agentsContent | Should -Match 'branch-protection-drift-check\.yml' $script:agentsContent | Should -Match 'Release Race Hardening Drill Alert' $script:agentsContent | Should -Match 'Branch Protection Drift Alert' + $script:agentsContent | Should -Match 'Release Guardrails Auto-Remediation Alert' $script:agentsContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:agentsContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:agentsContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:agentsContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' $script:agentsContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' $script:agentsContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' $script:agentsContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' @@ -452,15 +465,18 @@ Describe 'Workspace surface contract' { $script:readmeContent | Should -Match 'runtime_images' $script:readmeContent | Should -Match 'ops-slo-gate\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' + $script:readmeContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-drill\.yml' $script:readmeContent | Should -Match 'release-race-hardening-gate\.yml' $script:readmeContent | Should -Match 'branch-protection-drift-check\.yml' $script:readmeContent | Should -Match 'Release Race Hardening Drill' + $script:readmeContent | Should -Match 'Release Guardrails Auto-Remediation Alert' $script:readmeContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:readmeContent | Should -Match 'Invoke-OpsSloSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-RollbackDrillSelfHealing\.ps1' $script:readmeContent | Should -Match 'Invoke-ReleaseRaceHardeningDrill\.ps1' + $script:readmeContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' $script:readmeContent | Should -Match 'Test-ReleaseRaceHardeningGate\.ps1' $script:readmeContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1' $script:readmeContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' @@ -511,6 +527,7 @@ Describe 'Workspace surface contract' { $script:ciWorkflowContent | Should -Match 'OpsIncidentLifecycleContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'OpsSloGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'OpsPolicyDriftWorkflowContract\.Tests\.ps1' + $script:ciWorkflowContent | Should -Match 'ReleaseGuardrailsAutoRemediationWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'ReleaseRaceHardeningDrillWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'ReleaseRaceHardeningGateWorkflowContract\.Tests\.ps1' $script:ciWorkflowContent | Should -Match 'BranchProtectionDriftWorkflowContract\.Tests\.ps1' diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 0832bd1..66b2ca1 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -273,6 +273,7 @@ "Release Control Plane Alert", "Ops SLO Gate Alert", "Ops Policy Drift Alert", + "Release Guardrails Auto-Remediation Alert", "Release Rollback Drill Alert" ] }, @@ -299,6 +300,13 @@ "watch_timeout_minutes": 45, "verify_after_remediation": true }, + "guardrails": { + "remediation_workflow": "release-guardrails-autoremediate.yml", + "race_drill_workflow": "release-race-hardening-drill.yml", + "watch_timeout_minutes": 120, + "verify_after_remediation": true, + "race_gate_max_age_hours": 168 + }, "rollback_drill": { "release_workflow": "release-workspace-installer.yml", "release_branch": "main", diff --git a/workspace-governance.json b/workspace-governance.json index 0832bd1..66b2ca1 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -273,6 +273,7 @@ "Release Control Plane Alert", "Ops SLO Gate Alert", "Ops Policy Drift Alert", + "Release Guardrails Auto-Remediation Alert", "Release Rollback Drill Alert" ] }, @@ -299,6 +300,13 @@ "watch_timeout_minutes": 45, "verify_after_remediation": true }, + "guardrails": { + "remediation_workflow": "release-guardrails-autoremediate.yml", + "race_drill_workflow": "release-race-hardening-drill.yml", + "watch_timeout_minutes": 120, + "verify_after_remediation": true, + "race_gate_max_age_hours": 168 + }, "rollback_drill": { "release_workflow": "release-workspace-installer.yml", "release_branch": "main", From 412d82c1a4c7d5650d67893853ae003a49f9eb27 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:32:58 -0800 Subject: [PATCH 49/60] Use WORKFLOW_BOT_TOKEN fallback for guardrails workflows --- .github/workflows/branch-protection-drift-check.yml | 6 +++--- .github/workflows/release-guardrails-autoremediate.yml | 6 +++--- AGENTS.md | 1 + README.md | 1 + docs/runbooks/release-ops-incident-response.md | 4 ++++ tests/BranchProtectionDriftWorkflowContract.Tests.ps1 | 1 + ...leaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 | 1 + 7 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/branch-protection-drift-check.yml b/.github/workflows/branch-protection-drift-check.yml index 3d9856e..2f90d0a 100644 --- a/.github/workflows/branch-protection-drift-check.yml +++ b/.github/workflows/branch-protection-drift-check.yml @@ -24,7 +24,7 @@ jobs: - name: Verify release branch-protection policy shell: pwsh env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} run: | $ErrorActionPreference = 'Stop' $reportPath = Join-Path $env:RUNNER_TEMP 'branch-protection-drift-report.json' @@ -44,7 +44,7 @@ jobs: if: failure() shell: pwsh env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | @@ -78,7 +78,7 @@ jobs: if: success() shell: pwsh env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | diff --git a/.github/workflows/release-guardrails-autoremediate.yml b/.github/workflows/release-guardrails-autoremediate.yml index 760d07c..bb1ad96 100644 --- a/.github/workflows/release-guardrails-autoremediate.yml +++ b/.github/workflows/release-guardrails-autoremediate.yml @@ -42,7 +42,7 @@ jobs: - name: Execute release guardrails auto-remediation shell: pwsh env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} run: | $ErrorActionPreference = 'Stop' $reportPath = Join-Path $env:RUNNER_TEMP 'release-guardrails-autoremediate-report.json' @@ -104,7 +104,7 @@ jobs: if: failure() shell: pwsh env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | @@ -137,7 +137,7 @@ jobs: if: success() shell: pwsh env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | diff --git a/AGENTS.md b/AGENTS.md index d7b50a8..45a1339 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -313,6 +313,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Race-hardening gate must fail when latest successful drill evidence is missing/stale, `reason_code != drill_passed`, or collision evidence is absent. - `.github/workflows/branch-protection-drift-check.yml` must run `scripts/Test-ReleaseBranchProtectionPolicy.ps1` and maintain incident lifecycle title `Branch Protection Drift Alert`. - `.github/workflows/release-guardrails-autoremediate.yml` must run `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` and maintain incident lifecycle title `Release Guardrails Auto-Remediation Alert`. +- Branch-protection query/apply workflows must use `WORKFLOW_BOT_TOKEN` when available, with deterministic fallback to `github.token`. - `scripts/Set-ReleaseBranchProtectionPolicy.ps1` is the deterministic apply path for required-check drift repair. - Guardrails self-healing policy must remain explicit under `ops_control_plane_policy.self_healing.guardrails`: - `remediation_workflow` diff --git a/README.md b/README.md index d958577..ef09c30 100644 --- a/README.md +++ b/README.md @@ -507,6 +507,7 @@ It runs `scripts/Test-ReleaseRaceHardeningGate.ps1` and fails when: - `integration/*` Use `scripts/Set-ReleaseBranchProtectionPolicy.ps1` to deterministically apply/repair required check contracts. +Branch-protection workflows prefer `WORKFLOW_BOT_TOKEN` when available and deterministically fall back to `github.token`. `release-guardrails-autoremediate.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` to: - evaluate branch-protection drift and release race-hardening freshness in one pass diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index 2bc7c3a..d9259ef 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -319,6 +319,10 @@ Continuous drift monitor: gh workflow run branch-protection-drift-check.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ``` +Token policy for branch-protection workflows: +- prefer repository secret `WORKFLOW_BOT_TOKEN` +- deterministic fallback to `github.token` when the secret is unavailable + Local policy verify: ```powershell diff --git a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 index cd146ea..2fab0f8 100644 --- a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 +++ b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 @@ -32,6 +32,7 @@ Describe 'Branch protection drift workflow contract' { $script:workflowContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:workflowContent | Should -Match 'branch-protection-drift-report\.json' $script:workflowContent | Should -Match 'Branch Protection Drift Check' + $script:workflowContent | Should -Match 'WORKFLOW_BOT_TOKEN' } It 'manages failure and recovery incidents for branch-protection drift' { diff --git a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 index 939ec05..ee422f1 100644 --- a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 +++ b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 @@ -35,6 +35,7 @@ Describe 'Release guardrails auto-remediation workflow contract' { $script:workflowContent | Should -Match 'release-guardrails-autoremediate-report\.json' $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'Release Guardrails Auto-Remediation Alert' + $script:workflowContent | Should -Match 'WORKFLOW_BOT_TOKEN' $script:workflowContent | Should -Match '-Mode Fail' $script:workflowContent | Should -Match '-Mode Recover' } From bf13b5fba51062e14ced2ce35fbed6eb0c27845c Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:44:21 -0800 Subject: [PATCH 50/60] Harden autonomous guardrails token and auth reason mapping --- .../branch-protection-drift-check.yml | 16 ++++- .../release-guardrails-autoremediate.yml | 16 ++++- AGENTS.md | 7 ++- README.md | 7 ++- .../runbooks/release-ops-incident-response.md | 12 +++- .../Invoke-ReleaseGuardrailsSelfHealing.ps1 | 41 +++++++++++++ .../Test-ReleaseBranchProtectionPolicy.ps1 | 58 ++++++++++++++++++- ...hProtectionDriftWorkflowContract.Tests.ps1 | 7 +++ ...sAutoRemediationWorkflowContract.Tests.ps1 | 7 +++ 9 files changed, 160 insertions(+), 11 deletions(-) diff --git a/.github/workflows/branch-protection-drift-check.yml b/.github/workflows/branch-protection-drift-check.yml index 2f90d0a..5cd0cd9 100644 --- a/.github/workflows/branch-protection-drift-check.yml +++ b/.github/workflows/branch-protection-drift-check.yml @@ -21,10 +21,20 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Validate workflow bot token + shell: pwsh + env: + WORKFLOW_BOT_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} + run: | + $ErrorActionPreference = 'Stop' + if ([string]::IsNullOrWhiteSpace($env:WORKFLOW_BOT_TOKEN)) { + throw "workflow_bot_token_missing: Required secret WORKFLOW_BOT_TOKEN is not configured. Add a token with repository administration read/write access." + } + - name: Verify release branch-protection policy shell: pwsh env: - GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} run: | $ErrorActionPreference = 'Stop' $reportPath = Join-Path $env:RUNNER_TEMP 'branch-protection-drift-report.json' @@ -44,7 +54,7 @@ jobs: if: failure() shell: pwsh env: - GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | @@ -78,7 +88,7 @@ jobs: if: success() shell: pwsh env: - GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | diff --git a/.github/workflows/release-guardrails-autoremediate.yml b/.github/workflows/release-guardrails-autoremediate.yml index bb1ad96..5635d37 100644 --- a/.github/workflows/release-guardrails-autoremediate.yml +++ b/.github/workflows/release-guardrails-autoremediate.yml @@ -39,10 +39,20 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Validate workflow bot token + shell: pwsh + env: + WORKFLOW_BOT_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} + run: | + $ErrorActionPreference = 'Stop' + if ([string]::IsNullOrWhiteSpace($env:WORKFLOW_BOT_TOKEN)) { + throw "workflow_bot_token_missing: Required secret WORKFLOW_BOT_TOKEN is not configured. Add a token with repository administration read/write and actions write scopes." + } + - name: Execute release guardrails auto-remediation shell: pwsh env: - GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} run: | $ErrorActionPreference = 'Stop' $reportPath = Join-Path $env:RUNNER_TEMP 'release-guardrails-autoremediate-report.json' @@ -104,7 +114,7 @@ jobs: if: failure() shell: pwsh env: - GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | @@ -137,7 +147,7 @@ jobs: if: success() shell: pwsh env: - GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN != '' && secrets.WORKFLOW_BOT_TOKEN || github.token }} + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} REPOSITORY: ${{ github.repository }} RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | diff --git a/AGENTS.md b/AGENTS.md index 45a1339..01be9fa 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -313,8 +313,12 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Race-hardening gate must fail when latest successful drill evidence is missing/stale, `reason_code != drill_passed`, or collision evidence is absent. - `.github/workflows/branch-protection-drift-check.yml` must run `scripts/Test-ReleaseBranchProtectionPolicy.ps1` and maintain incident lifecycle title `Branch Protection Drift Alert`. - `.github/workflows/release-guardrails-autoremediate.yml` must run `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` and maintain incident lifecycle title `Release Guardrails Auto-Remediation Alert`. -- Branch-protection query/apply workflows must use `WORKFLOW_BOT_TOKEN` when available, with deterministic fallback to `github.token`. +- Branch-protection query/apply workflows must require repository secret `WORKFLOW_BOT_TOKEN` and fail fast with deterministic `workflow_bot_token_missing` when the secret is not configured. - `scripts/Set-ReleaseBranchProtectionPolicy.ps1` is the deterministic apply path for required-check drift repair. +- Branch-protection query classifier reason codes must remain explicit: + - `branch_protection_query_failed` + - `branch_protection_authentication_missing` + - `branch_protection_authz_denied` - Guardrails self-healing policy must remain explicit under `ops_control_plane_policy.self_healing.guardrails`: - `remediation_workflow` - `race_drill_workflow` @@ -329,6 +333,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `remediation_execution_failed` - `remediation_verify_failed` - `guardrails_self_heal_runtime_error` +- Guardrails report must include `remediation_hints` when status is fail and auto-remediation cannot fully recover. - Race-hardening drill reason codes must remain explicit: - `drill_passed` - `control_plane_collision_not_observed` diff --git a/README.md b/README.md index ef09c30..637d167 100644 --- a/README.md +++ b/README.md @@ -507,7 +507,11 @@ It runs `scripts/Test-ReleaseRaceHardeningGate.ps1` and fails when: - `integration/*` Use `scripts/Set-ReleaseBranchProtectionPolicy.ps1` to deterministically apply/repair required check contracts. -Branch-protection workflows prefer `WORKFLOW_BOT_TOKEN` when available and deterministically fall back to `github.token`. +Branch-protection workflows require repository secret `WORKFLOW_BOT_TOKEN` and fail fast with `workflow_bot_token_missing` when absent. +Branch-protection query failures remain deterministic with classified reason codes: +- `branch_protection_query_failed` +- `branch_protection_authentication_missing` +- `branch_protection_authz_denied` `release-guardrails-autoremediate.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` to: - evaluate branch-protection drift and release race-hardening freshness in one pass @@ -521,6 +525,7 @@ Branch-protection workflows prefer `WORKFLOW_BOT_TOKEN` when available and deter - `remediation_execution_failed` - `remediation_verify_failed` - `guardrails_self_heal_runtime_error` +- include `remediation_hints` in the report when guardrails cannot self-heal (for token/authz and stale drill guidance) Guardrails policy is codified in `installer_contract.release_client.ops_control_plane_policy.self_healing.guardrails`: - `remediation_workflow` diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index d9259ef..bf9e480 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -320,8 +320,14 @@ gh workflow run branch-protection-drift-check.yml -R LabVIEW-Community-CI-CD/lab ``` Token policy for branch-protection workflows: -- prefer repository secret `WORKFLOW_BOT_TOKEN` -- deterministic fallback to `github.token` when the secret is unavailable +- require repository secret `WORKFLOW_BOT_TOKEN` +- workflows fail fast with `workflow_bot_token_missing` when the secret is unavailable +- token must include repository administration permissions for branch-protection GraphQL read/apply operations + +Branch-protection query failure reason codes: +- `branch_protection_query_failed` +- `branch_protection_authentication_missing` +- `branch_protection_authz_denied` Local policy verify: @@ -373,6 +379,8 @@ Deterministic guardrails reason codes: - `remediation_verify_failed` - `guardrails_self_heal_runtime_error` +When `reason_code=no_automatable_action` or `reason_code=remediation_verify_failed`, inspect `remediation_hints` in `release-guardrails-autoremediate-report.json` for deterministic next actions. + Guardrails incident title: - `Release Guardrails Auto-Remediation Alert` diff --git a/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 b/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 index cc40226..13338e7 100644 --- a/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 +++ b/scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 @@ -115,6 +115,32 @@ function Format-ReasonCodeSet { return [string]::Join(',', @($normalized)) } +function Get-GuardrailsRemediationHints { + param( + [Parameter()][string[]]$BranchReasonCodes = @(), + [Parameter()][string[]]$RaceReasonCodes = @() + ) + + $hints = [System.Collections.Generic.List[string]]::new() + $normalizedBranchReasons = ConvertTo-StringArray -Value $BranchReasonCodes + $normalizedRaceReasons = ConvertTo-StringArray -Value $RaceReasonCodes + + if (@($normalizedBranchReasons) -contains 'branch_protection_authentication_missing') { + [void]$hints.Add('Configure WORKFLOW_BOT_TOKEN (or GH_TOKEN) with repository administration read/write permissions before rerunning guardrails remediation.') + } + if (@($normalizedBranchReasons) -contains 'branch_protection_authz_denied') { + [void]$hints.Add('Token lacks sufficient repository administration permissions for branch-protection GraphQL operations; rotate/replace WORKFLOW_BOT_TOKEN and rerun.') + } + if (@($normalizedBranchReasons) -contains 'branch_protection_query_failed' -and @($hints).Count -eq 0) { + [void]$hints.Add('Review branch-protection query connectivity/authentication in GitHub Actions logs, then rerun guardrails remediation.') + } + if (@($normalizedRaceReasons) -contains 'drill_run_stale') { + [void]$hints.Add('Dispatch release-race-hardening-drill.yml and confirm a fresh successful run is available before re-evaluating guardrails.') + } + + return @($hints) +} + function Test-ContainsAnyReasonCode { param( [Parameter()][string[]]$Source = @(), @@ -255,6 +281,7 @@ $report = [ordered]@{ status = 'fail' reason_code = '' message = '' + remediation_hints = @() initial_assessment = $null remediation_attempts = @() final_assessment = $null @@ -468,7 +495,14 @@ try { $report.reason_code = 'no_automatable_action' $finalBranchReasons = Get-ReasonCodesFromReport -Report $currentBranchAssessment.report $finalRaceReasons = Get-ReasonCodesFromReport -Report $currentRaceAssessment.report + $report.remediation_hints = @( + Get-GuardrailsRemediationHints -BranchReasonCodes @($finalBranchReasons) -RaceReasonCodes @($finalRaceReasons) + ) + $hintText = if (@($report.remediation_hints).Count -gt 0) { " remediation_hints=$([string]::Join(' | ', @($report.remediation_hints)))" } else { '' } $report.message = "No automatable remediation path. branch_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalBranchReasons) race_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalRaceReasons)" + if (-not [string]::IsNullOrWhiteSpace($hintText)) { + $report.message = "$($report.message)$hintText" + } } elseif ($executionFailureCount -gt 0) { $report.status = 'fail' $report.reason_code = 'remediation_execution_failed' @@ -478,7 +512,14 @@ try { $report.reason_code = 'remediation_verify_failed' $finalBranchReasons = Get-ReasonCodesFromReport -Report $currentBranchAssessment.report $finalRaceReasons = Get-ReasonCodesFromReport -Report $currentRaceAssessment.report + $report.remediation_hints = @( + Get-GuardrailsRemediationHints -BranchReasonCodes @($finalBranchReasons) -RaceReasonCodes @($finalRaceReasons) + ) + $hintText = if (@($report.remediation_hints).Count -gt 0) { " remediation_hints=$([string]::Join(' | ', @($report.remediation_hints)))" } else { '' } $report.message = "Guardrails remain failing after bounded remediation. branch_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalBranchReasons) race_reason_codes=$(Format-ReasonCodeSet -ReasonCodes $finalRaceReasons)" + if (-not [string]::IsNullOrWhiteSpace($hintText)) { + $report.message = "$($report.message)$hintText" + } } } } diff --git a/scripts/Test-ReleaseBranchProtectionPolicy.ps1 b/scripts/Test-ReleaseBranchProtectionPolicy.ps1 index 569157c..1c732e4 100644 --- a/scripts/Test-ReleaseBranchProtectionPolicy.ps1 +++ b/scripts/Test-ReleaseBranchProtectionPolicy.ps1 @@ -128,6 +128,54 @@ function Test-RuleContract { } } +function Resolve-QueryFailureReasonCodes { + param( + [Parameter()][string]$MessageText = '', + [Parameter()][bool]$GhTokenPresent = $false + ) + + $resolved = [System.Collections.Generic.List[string]]::new() + [void]$resolved.Add('branch_protection_query_failed') + + if (-not $GhTokenPresent) { + [void]$resolved.Add('branch_protection_authentication_missing') + return @($resolved) + } + + $normalized = ([string]$MessageText).ToLowerInvariant() + $authnTokens = @( + 'authentication required', + 'requires authentication', + 'http 401', + 'gh auth login', + 'not logged into any hosts', + 'bad credentials' + ) + $authzTokens = @( + 'resource not accessible by integration', + 'must have admin rights', + 'requires admin access', + 'repository administration', + 'insufficient permissions' + ) + + foreach ($token in $authnTokens) { + if ($normalized.Contains([string]$token)) { + [void]$resolved.Add('branch_protection_authentication_missing') + break + } + } + + foreach ($token in $authzTokens) { + if ($normalized.Contains([string]$token)) { + [void]$resolved.Add('branch_protection_authz_denied') + break + } + } + + return @($resolved) +} + $reasonCodes = [System.Collections.Generic.List[string]]::new() $MainRequiredContexts = Normalize-RequiredContexts -Values @($MainRequiredContexts) @@ -150,6 +198,9 @@ $report = [ordered]@{ main_rule = $null integration_rule = $null } + auth_context = [ordered]@{ + gh_token_present = -not [string]::IsNullOrWhiteSpace([string]$env:GH_TOKEN) + } } try { @@ -228,7 +279,12 @@ query($owner:String!, $name:String!) { } catch { if ($reasonCodes.Count -eq 0) { - Add-ReasonCode -Target $reasonCodes -ReasonCode 'branch_protection_query_failed' + $queryFailureReasons = Resolve-QueryFailureReasonCodes ` + -MessageText ([string]$_.Exception.Message) ` + -GhTokenPresent ([bool]$report.auth_context.gh_token_present) + foreach ($reasonCode in @($queryFailureReasons)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode ([string]$reasonCode) + } } $report.status = 'fail' $report.reason_codes = @($reasonCodes) diff --git a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 index 2fab0f8..be1b46f 100644 --- a/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 +++ b/tests/BranchProtectionDriftWorkflowContract.Tests.ps1 @@ -29,10 +29,14 @@ Describe 'Branch protection drift workflow contract' { } It 'verifies policy and publishes a machine-readable drift report' { + $script:workflowContent | Should -Match 'Validate workflow bot token' $script:workflowContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:workflowContent | Should -Match 'branch-protection-drift-report\.json' $script:workflowContent | Should -Match 'Branch Protection Drift Check' $script:workflowContent | Should -Match 'WORKFLOW_BOT_TOKEN' + $script:workflowContent | Should -Match 'workflow_bot_token_missing' + $script:workflowContent | Should -Match 'GH_TOKEN:\s*\${{\s*secrets\.WORKFLOW_BOT_TOKEN\s*}}' + $script:workflowContent | Should -Not -Match 'github\.token' } It 'manages failure and recovery incidents for branch-protection drift' { @@ -52,6 +56,9 @@ Describe 'Branch protection drift workflow contract' { $script:verifyContent | Should -Match 'main_rule_missing' $script:verifyContent | Should -Match 'integration_rule_missing' $script:verifyContent | Should -Match 'branch_protection_query_failed' + $script:verifyContent | Should -Match 'branch_protection_authentication_missing' + $script:verifyContent | Should -Match 'branch_protection_authz_denied' + $script:verifyContent | Should -Match 'auth_context' } It 'supports deterministic apply and verification of branch-protection policy' { diff --git a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 index ee422f1..b021313 100644 --- a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 +++ b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 @@ -31,11 +31,15 @@ Describe 'Release guardrails auto-remediation workflow contract' { } It 'executes guardrail runtime and incident lifecycle management' { + $script:workflowContent | Should -Match 'Validate workflow bot token' $script:workflowContent | Should -Match 'Invoke-ReleaseGuardrailsSelfHealing\.ps1' $script:workflowContent | Should -Match 'release-guardrails-autoremediate-report\.json' $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'Release Guardrails Auto-Remediation Alert' $script:workflowContent | Should -Match 'WORKFLOW_BOT_TOKEN' + $script:workflowContent | Should -Match 'workflow_bot_token_missing' + $script:workflowContent | Should -Match 'GH_TOKEN:\s*\${{\s*secrets\.WORKFLOW_BOT_TOKEN\s*}}' + $script:workflowContent | Should -Not -Match 'github\.token' $script:workflowContent | Should -Match '-Mode Fail' $script:workflowContent | Should -Match '-Mode Recover' } @@ -50,6 +54,9 @@ Describe 'Release guardrails auto-remediation workflow contract' { $script:runtimeContent | Should -Match 'drill_run_stale' $script:runtimeContent | Should -Match 'apply_branch_protection_policy' $script:runtimeContent | Should -Match 'dispatch_release_race_hardening_drill' + $script:runtimeContent | Should -Match 'remediation_hints' + $script:runtimeContent | Should -Match 'branch_protection_authentication_missing' + $script:runtimeContent | Should -Match 'branch_protection_authz_denied' } It 'keeps deterministic self-healing reason codes explicit' { From 7bfbfc7d07de420d2fec9b4b1b399be6c30e7afb Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:46:43 -0800 Subject: [PATCH 51/60] Align governance contract checks with branch-protection baseline --- .github/scripts/Invoke-GovernanceContract.ps1 | 55 ++++++++++++++++--- .github/workflows/governance-contract.yml | 1 + 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/.github/scripts/Invoke-GovernanceContract.ps1 b/.github/scripts/Invoke-GovernanceContract.ps1 index 62ada00..ecd39e3 100644 --- a/.github/scripts/Invoke-GovernanceContract.ps1 +++ b/.github/scripts/Invoke-GovernanceContract.ps1 @@ -37,12 +37,53 @@ if ([string]::IsNullOrWhiteSpace($env:GH_TOKEN)) { throw 'GH token is required. Set GH_ADMIN_TOKEN (preferred) or WORKFLOW_BOT_TOKEN/GH_TOKEN/GITHUB_TOKEN.' } -$requiredContexts = @( - 'CI Pipeline', - 'Workspace Installer Contract', - 'Reproducibility Contract', - 'Provenance Contract' -) +function ConvertTo-BoolOrDefault { + param( + [Parameter()][AllowNull()][string]$Value = '', + [Parameter()][bool]$Default = $false + ) + + if ([string]::IsNullOrWhiteSpace([string]$Value)) { + return $Default + } + + try { + return [System.Convert]::ToBoolean([string]$Value) + } catch { + $normalized = ([string]$Value).Trim().ToLowerInvariant() + if (@('1', 'yes', 'y', 'on') -contains $normalized) { + return $true + } + if (@('0', 'no', 'n', 'off') -contains $normalized) { + return $false + } + return $Default + } +} + +$enableSelfHostedContracts = ConvertTo-BoolOrDefault -Value ([string]$env:ENABLE_SELF_HOSTED_CONTRACTS) -Default $false +$requiredContexts = [System.Collections.Generic.List[string]]::new() +foreach ($context in @( + 'CI Pipeline', + 'Integration Gate', + 'Release Race Hardening Drill' + )) { + if (-not $requiredContexts.Contains([string]$context)) { + [void]$requiredContexts.Add([string]$context) + } +} + +if ($enableSelfHostedContracts) { + foreach ($context in @( + 'Workspace Installer Contract', + 'Reproducibility Contract', + 'Provenance Contract' + )) { + if (-not $requiredContexts.Contains([string]$context)) { + [void]$requiredContexts.Add([string]$context) + } + } +} $endpoint = "repos/$RepoSlug/branches/$([uri]::EscapeDataString($Branch))/protection" $response = & gh api $endpoint 2>&1 @@ -68,7 +109,7 @@ if ($null -ne $protection.required_status_checks -and $null -ne $protection.requ $actualContexts = @($protection.required_status_checks.contexts) } -foreach ($context in $requiredContexts) { +foreach ($context in @($requiredContexts)) { if ($actualContexts -notcontains $context) { $issues += "missing required status context: $context" } diff --git a/.github/workflows/governance-contract.yml b/.github/workflows/governance-contract.yml index 1d2243c..84d3c07 100644 --- a/.github/workflows/governance-contract.yml +++ b/.github/workflows/governance-contract.yml @@ -24,6 +24,7 @@ jobs: GH_ADMIN_TOKEN: ${{ secrets.GH_ADMIN_TOKEN }} WORKFLOW_BOT_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} GH_TOKEN: ${{ github.token }} + ENABLE_SELF_HOSTED_CONTRACTS: ${{ vars.ENABLE_SELF_HOSTED_CONTRACTS }} run: | pwsh -NoProfile -File ./.github/scripts/Invoke-GovernanceContract.ps1 ` -RepoSlug '${{ github.repository }}' ` From 4973cfa1284445542555866bc91d16a0a669f07e Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 00:48:15 -0800 Subject: [PATCH 52/60] Make governance PR-review enforcement opt-in --- .github/scripts/Invoke-GovernanceContract.ps1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/Invoke-GovernanceContract.ps1 b/.github/scripts/Invoke-GovernanceContract.ps1 index ecd39e3..b1d61b5 100644 --- a/.github/scripts/Invoke-GovernanceContract.ps1 +++ b/.github/scripts/Invoke-GovernanceContract.ps1 @@ -62,6 +62,7 @@ function ConvertTo-BoolOrDefault { } $enableSelfHostedContracts = ConvertTo-BoolOrDefault -Value ([string]$env:ENABLE_SELF_HOSTED_CONTRACTS) -Default $false +$requirePullRequestReviews = ConvertTo-BoolOrDefault -Value ([string]$env:GOVERNANCE_REQUIRE_PR_REVIEWS) -Default $false $requiredContexts = [System.Collections.Generic.List[string]]::new() foreach ($context in @( 'CI Pipeline', @@ -115,7 +116,7 @@ foreach ($context in @($requiredContexts)) { } } -if ($null -eq $protection.required_pull_request_reviews) { +if ($requirePullRequestReviews -and $null -eq $protection.required_pull_request_reviews) { $issues += 'required_pull_request_reviews is not enabled' } From bb8eef165e7f57fdeb60874de75883dffded02fd Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 01:05:24 -0800 Subject: [PATCH 53/60] Scope1: add token drill and SLO alert threshold guardrails --- .github/workflows/ci.yml | 1 + .github/workflows/ops-slo-gate.yml | 40 ++++ .../workflows/workflow-bot-token-drill.yml | 116 ++++++++++ AGENTS.md | 7 + README.md | 14 ++ .../runbooks/release-ops-incident-response.md | 28 ++- scripts/Invoke-OpsIncidentLifecycle.ps1 | 64 +++++- scripts/Invoke-OpsSloSelfHealing.ps1 | 158 +++++++++++++ scripts/Test-PolicyContracts.ps1 | 9 + scripts/Test-ReleaseClientContracts.ps1 | 9 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 9 + scripts/Test-WorkflowBotTokenHealth.ps1 | 207 ++++++++++++++++++ tests/OpsIncidentLifecycleContract.Tests.ps1 | 3 + .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 1 + tests/OpsSloGateWorkflowContract.Tests.ps1 | 7 + tests/ReleaseClientPolicyContract.Tests.ps1 | 11 + tests/ScopeAOpsRunbookContract.Tests.ps1 | 4 + ...lowBotTokenDrillWorkflowContract.Tests.ps1 | 50 +++++ tests/WorkspaceSurfaceContract.Tests.ps1 | 13 ++ .../workspace-governance.json | 17 +- workspace-governance.json | 17 +- 21 files changed, 778 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/workflow-bot-token-drill.yml create mode 100644 scripts/Test-WorkflowBotTokenHealth.ps1 create mode 100644 tests/WorkflowBotTokenDrillWorkflowContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14edb19..42fbcdd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -82,6 +82,7 @@ jobs: './tests/OpsIncidentLifecycleContract.Tests.ps1', './tests/OpsAutoRemediationWorkflowContract.Tests.ps1', './tests/OpsSloGateWorkflowContract.Tests.ps1', + './tests/WorkflowBotTokenDrillWorkflowContract.Tests.ps1', './tests/OpsPolicyDriftWorkflowContract.Tests.ps1', './tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1', './tests/BranchProtectionDriftWorkflowContract.Tests.ps1', diff --git a/.github/workflows/ops-slo-gate.yml b/.github/workflows/ops-slo-gate.yml index 7760577..6068cff 100644 --- a/.github/workflows/ops-slo-gate.yml +++ b/.github/workflows/ops-slo-gate.yml @@ -35,6 +35,16 @@ on: required: false default: '45' type: string + warning_min_success_rate_pct: + description: Warning severity threshold for lowest workflow success rate. + required: false + default: '99.5' + type: string + critical_min_success_rate_pct: + description: Critical severity threshold for lowest workflow success rate. + required: false + default: '99' + type: string permissions: contents: read @@ -113,6 +123,26 @@ jobs: $selfHealWatchTimeout = $parsedWatchTimeout } + $warningMinSuccessRatePctText = [string]'${{ inputs.warning_min_success_rate_pct }}' + $warningMinSuccessRatePct = 99.5 + if (-not [string]::IsNullOrWhiteSpace($warningMinSuccessRatePctText)) { + $parsedWarningSuccessRate = 0.0 + if (-not [double]::TryParse($warningMinSuccessRatePctText, [ref]$parsedWarningSuccessRate)) { + throw "warning_min_success_rate_pct must be a number. actual='$warningMinSuccessRatePctText'" + } + $warningMinSuccessRatePct = $parsedWarningSuccessRate + } + + $criticalMinSuccessRatePctText = [string]'${{ inputs.critical_min_success_rate_pct }}' + $criticalMinSuccessRatePct = 99.0 + if (-not [string]::IsNullOrWhiteSpace($criticalMinSuccessRatePctText)) { + $parsedCriticalSuccessRate = 0.0 + if (-not [double]::TryParse($criticalMinSuccessRatePctText, [ref]$parsedCriticalSuccessRate)) { + throw "critical_min_success_rate_pct must be a number. actual='$criticalMinSuccessRatePctText'" + } + $criticalMinSuccessRatePct = $parsedCriticalSuccessRate + } + & pwsh -NoProfile -File ./scripts/Invoke-OpsSloSelfHealing.ps1 ` -SurfaceRepository '${{ github.repository }}' ` -LookbackDays $lookbackDays ` @@ -121,6 +151,8 @@ jobs: -AutoRemediate:$autoSelfHeal ` -MaxAttempts $selfHealMaxAttempts ` -WatchTimeoutMinutes $selfHealWatchTimeout ` + -WarningMinSuccessRatePct $warningMinSuccessRatePct ` + -CriticalMinSuccessRatePct $criticalMinSuccessRatePct ` -OutputPath $reportPath - name: Upload ops SLO gate report @@ -150,15 +182,21 @@ jobs: $finalReasonCodes = @($report.final_report.reason_codes | ForEach-Object { [string]$_ }) $finalReasonCodeText = if ($finalReasonCodes.Count -gt 0) { [string]::Join(',', $finalReasonCodes) } else { 'unknown' } $attemptCount = @($report.remediation_attempts).Count + $severity = [string]$report.alert_severity + $warningThreshold = [string]$report.alert_thresholds.warning_min_success_rate_pct + $criticalThreshold = [string]$report.alert_thresholds.critical_min_success_rate_pct $body = @" Ops SLO gate failed. - Run: $env:RUN_URL + - Alert severity: $severity - Reason code: $($report.reason_code) - Final reason codes: $finalReasonCodeText - Message: $($report.message) - Lookback days: $($report.lookback_days) - Min success rate pct: $($report.min_success_rate_pct) + - Warning min success rate pct: $warningThreshold + - Critical min success rate pct: $criticalThreshold - Auto remediate: $($report.auto_remediate) - Remediation attempts: $attemptCount "@ @@ -187,10 +225,12 @@ jobs: $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop $attemptCount = @($report.remediation_attempts).Count + $severity = [string]$report.alert_severity $body = @" Ops SLO gate recovered. - Run: $env:RUN_URL + - Alert severity: $severity - Reason code: $($report.reason_code) - Message: $($report.message) - Lookback days: $($report.lookback_days) diff --git a/.github/workflows/workflow-bot-token-drill.yml b/.github/workflows/workflow-bot-token-drill.yml new file mode 100644 index 0000000..d0e60e8 --- /dev/null +++ b/.github/workflows/workflow-bot-token-drill.yml @@ -0,0 +1,116 @@ +name: workflow-bot-token-drill + +on: + schedule: + - cron: '5 9 * * 1' + workflow_dispatch: + +permissions: + contents: read + issues: write + actions: read + +jobs: + workflow-bot-token-drill: + name: Workflow Bot Token Drill + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Validate workflow bot token + shell: pwsh + env: + WORKFLOW_BOT_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} + run: | + $ErrorActionPreference = 'Stop' + if ([string]::IsNullOrWhiteSpace($env:WORKFLOW_BOT_TOKEN)) { + throw "workflow_bot_token_missing: Required secret WORKFLOW_BOT_TOKEN is not configured." + } + + - name: Execute workflow bot token drill + shell: pwsh + env: + GH_TOKEN: ${{ secrets.WORKFLOW_BOT_TOKEN }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'workflow-bot-token-drill-report.json' + & pwsh -NoProfile -File ./scripts/Test-WorkflowBotTokenHealth.ps1 ` + -Repository '${{ github.repository }}' ` + -OutputPath $reportPath + + - name: Upload workflow bot token drill report + if: always() + uses: actions/upload-artifact@v4 + with: + name: workflow-bot-token-drill-report-${{ github.run_id }} + path: ${{ runner.temp }}/workflow-bot-token-drill-report.json + if-no-files-found: error + + - name: Update workflow bot token drill issue on failure + if: failure() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Workflow Bot Token Health Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'workflow-bot-token-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "workflow bot token drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Workflow bot token drill failed. + + - Run: $env:RUN_URL + - Repository: $($report.repository) + - Reason codes: $reasonCodeText + - Message: $($report.message) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Fail ` + -RunUrl $env:RUN_URL ` + -Body $body + + - name: Close workflow bot token drill issue on recovery + if: success() + shell: pwsh + env: + GH_TOKEN: ${{ github.token }} + REPOSITORY: ${{ github.repository }} + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $title = 'Workflow Bot Token Health Alert' + $reportPath = Join-Path $env:RUNNER_TEMP 'workflow-bot-token-drill-report.json' + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf)) { + throw "workflow bot token drill report missing: $reportPath" + } + + $report = Get-Content -LiteralPath $reportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $reasonCodes = @($report.reason_codes | ForEach-Object { [string]$_ }) + $reasonCodeText = if ($reasonCodes.Count -gt 0) { [string]::Join(',', $reasonCodes) } else { 'unknown' } + $body = @" + Workflow bot token drill recovered. + + - Run: $env:RUN_URL + - Repository: $($report.repository) + - Reason codes: $reasonCodeText + - Message: $($report.message) + "@ + + & pwsh -NoProfile -File ./scripts/Invoke-OpsIncidentLifecycle.ps1 ` + -Repository $env:REPOSITORY ` + -IssueTitle $title ` + -Mode Recover ` + -RunUrl $env:RUN_URL ` + -Body $body diff --git a/AGENTS.md b/AGENTS.md index 01be9fa..68e316d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -261,6 +261,11 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `control_plane_runtime_error` - `.github/workflows/weekly-ops-slo-report.yml` must publish machine-readable SLO evidence generated by `scripts/Write-OpsSloReport.ps1`. - `.github/workflows/ops-slo-gate.yml` must enforce deterministic SLO gate policy using `scripts/Invoke-OpsSloSelfHealing.ps1`. +- SLO gate alert thresholds must remain explicit under `ops_control_plane_policy.slo_gate.alert_thresholds`: + - `warning_min_success_rate_pct` + - `critical_min_success_rate_pct` + - `warning_reason_codes` + - `critical_reason_codes` - SLO self-healing reason codes must remain explicit: - `already_healthy` - `remediated` @@ -283,6 +288,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `release_client_drift` - `runtime_images_missing` - `ops_control_plane_policy_missing` + - `ops_control_plane_slo_alert_thresholds_missing` - `ops_control_plane_self_healing_missing` - `ops_control_plane_guardrails_missing` - `ops_control_plane_stable_window_missing` @@ -313,6 +319,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Race-hardening gate must fail when latest successful drill evidence is missing/stale, `reason_code != drill_passed`, or collision evidence is absent. - `.github/workflows/branch-protection-drift-check.yml` must run `scripts/Test-ReleaseBranchProtectionPolicy.ps1` and maintain incident lifecycle title `Branch Protection Drift Alert`. - `.github/workflows/release-guardrails-autoremediate.yml` must run `scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1` and maintain incident lifecycle title `Release Guardrails Auto-Remediation Alert`. +- `.github/workflows/workflow-bot-token-drill.yml` must run `scripts/Test-WorkflowBotTokenHealth.ps1` and maintain incident lifecycle title `Workflow Bot Token Health Alert`. - Branch-protection query/apply workflows must require repository secret `WORKFLOW_BOT_TOKEN` and fail fast with deterministic `workflow_bot_token_missing` when the secret is not configured. - `scripts/Set-ReleaseBranchProtectionPolicy.ps1` is the deterministic apply path for required-check drift repair. - Branch-protection query classifier reason codes must remain explicit: diff --git a/README.md b/README.md index 637d167..1db7701 100644 --- a/README.md +++ b/README.md @@ -431,6 +431,11 @@ Top-level release-control-plane deterministic failure reason codes include: - 7-day lookback by default - 100% success-rate target for `ops-monitoring`, `ops-autoremediate`, and `release-control-plane` - max sync-guard success age of 12 hours +- alert thresholds for severity classification: + - warning minimum workflow success rate: `99.5` + - critical minimum workflow success rate: `99` + - warning reason codes: `workflow_missing_runs`, `workflow_success_rate_below_threshold` + - critical reason codes: `workflow_failure_detected`, `sync_guard_missing`, `sync_guard_stale`, `slo_gate_runtime_error` - bounded self-healing by dispatching `ops-autoremediate.yml` and re-verifying SLO status - deterministic reason codes on failure: - `auto_remediation_disabled` @@ -452,6 +457,7 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `release_client_drift` - `runtime_images_missing` - `ops_control_plane_policy_missing` + - `ops_control_plane_slo_alert_thresholds_missing` - `ops_control_plane_self_healing_missing` - `ops_control_plane_guardrails_missing` - `ops_control_plane_stable_window_missing` @@ -536,6 +542,14 @@ Guardrails policy is codified in `installer_contract.release_client.ops_control_ Incident lifecycle title for this lane is `Release Guardrails Auto-Remediation Alert`. +`workflow-bot-token-drill.yml` is scheduled weekly and supports manual dispatch. It runs `scripts/Test-WorkflowBotTokenHealth.ps1` to verify that `WORKFLOW_BOT_TOKEN` can execute required control-plane API operations (`repo read`, `actions runners read`, and branch-protection GraphQL read). +- deterministic reason codes: + - `token_missing` + - `token_invalid` + - `token_scope_insufficient` + - `token_health_runtime_error` +- incident lifecycle title for this lane: `Workflow Bot Token Health Alert` + ## Local Docker package for control-plane exercise Run the local Docker harness (safe default, validate + dry-run): diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index bf9e480..c155281 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -206,9 +206,18 @@ Run with explicit thresholds: gh workflow run ops-slo-gate.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork ` -f lookback_days=7 ` -f min_success_rate_pct=100 ` - -f sync_guard_max_age_hours=12 + -f sync_guard_max_age_hours=12 ` + -f warning_min_success_rate_pct=99.5 ` + -f critical_min_success_rate_pct=99 ``` +SLO report severity fields: +- `alert_severity` (`none|warning|critical`) +- `alert_thresholds.warning_min_success_rate_pct` +- `alert_thresholds.critical_min_success_rate_pct` +- `alert_thresholds.warning_reason_codes` +- `alert_thresholds.critical_reason_codes` + ## Policy Drift Check Dispatch Run control-plane policy drift check: @@ -384,6 +393,22 @@ When `reason_code=no_automatable_action` or `reason_code=remediation_verify_fail Guardrails incident title: - `Release Guardrails Auto-Remediation Alert` +## Workflow Bot Token Drill +Dispatch token-health drill: + +```powershell +gh workflow run workflow-bot-token-drill.yml -R LabVIEW-Community-CI-CD/labview-cdev-surface-fork +``` + +Deterministic token drill reason codes: +- `token_missing` +- `token_invalid` +- `token_scope_insufficient` +- `token_health_runtime_error` + +Token drill incident title: +- `Workflow Bot Token Health Alert` + ## Evidence to Attach to Incident - `ops-monitoring-report.json` - `canary-smoke-tag-hygiene-report.json` @@ -393,5 +418,6 @@ Guardrails incident title: - `release-race-hardening-gate-report.json` - `branch-protection-drift-report.json` - `release-guardrails-autoremediate-report.json` +- `workflow-bot-token-drill-report.json` - sync guard run URL - parity SHAs (upstream and fork) diff --git a/scripts/Invoke-OpsIncidentLifecycle.ps1 b/scripts/Invoke-OpsIncidentLifecycle.ps1 index 686e452..71993e5 100644 --- a/scripts/Invoke-OpsIncidentLifecycle.ps1 +++ b/scripts/Invoke-OpsIncidentLifecycle.ps1 @@ -64,6 +64,50 @@ Ops incident recovered. "@ } +function Normalize-IncidentBody { + param( + [Parameter()][AllowNull()][string]$Text = '' + ) + + $normalized = [string]$Text + $normalized = $normalized -replace "`r`n", "`n" + $normalized = $normalized -replace "`r", "`n" + + $lines = [System.Collections.Generic.List[string]]::new() + foreach ($line in @($normalized -split "`n", -1, [System.StringSplitOptions]::None)) { + [void]$lines.Add(([string]$line).TrimEnd()) + } + + $compacted = [System.Collections.Generic.List[string]]::new() + $blankStreak = 0 + foreach ($line in @($lines)) { + if ([string]::IsNullOrWhiteSpace([string]$line)) { + $blankStreak++ + if ($blankStreak -gt 1) { + continue + } + [void]$compacted.Add('') + continue + } + + $blankStreak = 0 + [void]$compacted.Add([string]$line) + } + + while (@($compacted).Count -gt 0 -and [string]::IsNullOrWhiteSpace([string]$compacted[0])) { + $compacted.RemoveAt(0) + } + while (@($compacted).Count -gt 0 -and [string]::IsNullOrWhiteSpace([string]$compacted[@($compacted).Count - 1])) { + $compacted.RemoveAt(@($compacted).Count - 1) + } + + if (@($compacted).Count -eq 0) { + return '' + } + + return ([string]::Join("`n", @($compacted)) + "`n") +} + $report = [ordered]@{ schema_version = '1.0' timestamp_utc = Get-UtcNowIso @@ -73,12 +117,24 @@ $report = [ordered]@{ run_url = $RunUrl status = 'fail' action = '' + body_line_count = 0 + body_sha256 = '' issue = $null message = '' } try { $resolvedBody = Resolve-Body -LifecycleMode $Mode -Text $Body -Url $RunUrl + $normalizedBody = Normalize-IncidentBody -Text $resolvedBody + $bodyLines = @( + $normalizedBody -split "`n", -1, [System.StringSplitOptions]::None | + Where-Object { -not [string]::IsNullOrWhiteSpace([string]$_) } + ) + $report.body_line_count = @($bodyLines).Count + $report.body_sha256 = [System.BitConverter]::ToString( + [System.Security.Cryptography.SHA256]::Create().ComputeHash([System.Text.Encoding]::UTF8.GetBytes($normalizedBody)) + ).Replace('-', '').ToLowerInvariant() + $issues = @(Invoke-GhJson -Arguments @( 'issue', 'list', '-R', $Repository, @@ -96,7 +152,7 @@ try { 'issue', 'create', '-R', $Repository, '--title', $IssueTitle, - '--body', $resolvedBody + '--body', $normalizedBody )).Trim() $report.action = 'created' $report.issue = [ordered]@{ @@ -113,7 +169,7 @@ try { if ($issueState -eq 'CLOSED') { Invoke-Gh -Arguments @('issue', 'reopen', $issueNumber, '-R', $Repository) - Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $resolvedBody) + Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $normalizedBody) $report.action = 'reopened_and_commented' $report.issue = [ordered]@{ number = $issueNumber @@ -123,7 +179,7 @@ try { } $report.message = "Closed incident issue reopened and updated (#$issueNumber)." } else { - Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $resolvedBody) + Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $normalizedBody) $report.action = 'commented' $report.issue = [ordered]@{ number = $issueNumber @@ -150,7 +206,7 @@ try { $issueUrl = [string]$target[0].url if ($issueState -eq 'OPEN') { - Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $resolvedBody) + Invoke-Gh -Arguments @('issue', 'comment', $issueNumber, '-R', $Repository, '--body', $normalizedBody) Invoke-Gh -Arguments @('issue', 'close', $issueNumber, '-R', $Repository) $report.action = 'commented_and_closed' $report.issue = [ordered]@{ diff --git a/scripts/Invoke-OpsSloSelfHealing.ps1 b/scripts/Invoke-OpsSloSelfHealing.ps1 index fd81426..c91417b 100644 --- a/scripts/Invoke-OpsSloSelfHealing.ps1 +++ b/scripts/Invoke-OpsSloSelfHealing.ps1 @@ -36,6 +36,30 @@ param( [ValidateRange(5, 240)] [int]$WatchTimeoutMinutes = 45, + [Parameter()] + [ValidateRange(0, 100)] + [double]$WarningMinSuccessRatePct = 99.5, + + [Parameter()] + [ValidateRange(0, 100)] + [double]$CriticalMinSuccessRatePct = 99.0, + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string[]]$WarningReasonCodes = @( + 'workflow_missing_runs', + 'workflow_success_rate_below_threshold' + ), + + [Parameter()] + [ValidateNotNullOrEmpty()] + [string[]]$CriticalReasonCodes = @( + 'workflow_failure_detected', + 'sync_guard_missing', + 'sync_guard_stale', + 'slo_gate_runtime_error' + ), + [Parameter()] [string]$OutputPath = '' ) @@ -55,6 +79,116 @@ foreach ($requiredScript in @($sloGateScript, $dispatchWorkflowScript, $watchWor } } +function ConvertTo-StringArray { + param([Parameter()][AllowNull()]$Value) + + if ($null -eq $Value) { + return @() + } + + if ($Value -is [string]) { + if ([string]::IsNullOrWhiteSpace([string]$Value)) { + return @() + } + return @([string]$Value) + } + + $items = [System.Collections.Generic.List[string]]::new() + foreach ($entry in @($Value)) { + $text = [string]$entry + if ([string]::IsNullOrWhiteSpace($text)) { + continue + } + if (-not $items.Contains($text)) { + [void]$items.Add($text) + } + } + + return @($items) +} + +function Test-ContainsAnyReasonCode { + param( + [Parameter()][string[]]$Source = @(), + [Parameter()][string[]]$Candidates = @() + ) + + $normalizedSource = ConvertTo-StringArray -Value $Source + foreach ($reason in @($normalizedSource)) { + if (@($Candidates) -contains [string]$reason) { + return $true + } + } + + return $false +} + +function Get-LowestWorkflowSuccessRate { + param([Parameter()][AllowNull()]$GateReport) + + if ($null -eq $GateReport) { + return $null + } + + $evaluations = @($GateReport.workflow_evaluations) + if (@($evaluations).Count -eq 0) { + return $null + } + + $lowest = $null + foreach ($evaluation in @($evaluations)) { + $value = $null + try { + $value = [double]$evaluation.success_rate_pct + } catch { + $value = $null + } + + if ($null -eq $value) { + continue + } + + if ($null -eq $lowest -or [double]$value -lt [double]$lowest) { + $lowest = [double]$value + } + } + + return $lowest +} + +function Resolve-OpsSloAlertSeverity { + param( + [Parameter()][string]$OverallStatus = 'fail', + [Parameter()][AllowNull()]$GateReport = $null, + [Parameter()][double]$WarningThreshold = 99.5, + [Parameter()][double]$CriticalThreshold = 99.0, + [Parameter()][string[]]$WarningCodes = @(), + [Parameter()][string[]]$CriticalCodes = @() + ) + + if ([string]$OverallStatus -eq 'pass') { + return 'none' + } + + $reasonCodes = ConvertTo-StringArray -Value @($GateReport.reason_codes) + if (Test-ContainsAnyReasonCode -Source @($reasonCodes) -Candidates @($CriticalCodes)) { + return 'critical' + } + if (Test-ContainsAnyReasonCode -Source @($reasonCodes) -Candidates @($WarningCodes)) { + return 'warning' + } + + $lowestSuccessRate = Get-LowestWorkflowSuccessRate -GateReport $GateReport + if ($null -ne $lowestSuccessRate -and [double]$lowestSuccessRate -lt $CriticalThreshold) { + return 'critical' + } + if ($null -ne $lowestSuccessRate -and [double]$lowestSuccessRate -lt $WarningThreshold) { + return 'warning' + } + + return 'warning' +} + function Invoke-SloGateAssessment { param( [Parameter(Mandatory = $true)][string]$ScriptPath, @@ -123,6 +257,13 @@ $report = [ordered]@{ remediation_branch = $RemediationBranch max_attempts = $MaxAttempts watch_timeout_minutes = $WatchTimeoutMinutes + alert_thresholds = [ordered]@{ + warning_min_success_rate_pct = $WarningMinSuccessRatePct + critical_min_success_rate_pct = $CriticalMinSuccessRatePct + warning_reason_codes = @($WarningReasonCodes) + critical_reason_codes = @($CriticalReasonCodes) + } + alert_severity = 'none' status = 'fail' reason_code = '' message = '' @@ -265,10 +406,27 @@ try { } } } + + $severityGateReport = if ($null -ne $report.final_report) { $report.final_report } else { $report.initial_report } + $report.alert_severity = Resolve-OpsSloAlertSeverity ` + -OverallStatus ([string]$report.status) ` + -GateReport $severityGateReport ` + -WarningThreshold $WarningMinSuccessRatePct ` + -CriticalThreshold $CriticalMinSuccessRatePct ` + -WarningCodes @($WarningReasonCodes) ` + -CriticalCodes @($CriticalReasonCodes) } catch { $report.status = 'fail' $report.reason_code = 'slo_self_heal_runtime_error' $report.message = [string]$_.Exception.Message + $severityGateReport = if ($null -ne $report.final_report) { $report.final_report } else { $report.initial_report } + $report.alert_severity = Resolve-OpsSloAlertSeverity ` + -OverallStatus ([string]$report.status) ` + -GateReport $severityGateReport ` + -WarningThreshold $WarningMinSuccessRatePct ` + -CriticalThreshold $CriticalMinSuccessRatePct ` + -WarningCodes @($WarningReasonCodes) ` + -CriticalCodes @($CriticalReasonCodes) } finally { Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index 296be2e..ad317e7 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -172,12 +172,21 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_warning_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_min_success_rate_pct -eq 99.5) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_min_success_rate_pct) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_min_success_rate_pct -eq 99) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_min_success_rate_pct) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_warning_reason_workflow_missing_runs' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) -contains 'workflow_missing_runs') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_warning_reason_workflow_success_rate_below_threshold' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) -contains 'workflow_success_rate_below_threshold') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_workflow_failure_detected' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'workflow_failure_detected') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_sync_guard_missing' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_missing') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_sync_guard_stale' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_stale') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_slo_gate_runtime_error' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'slo_gate_runtime_error') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_auto_close' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_reopen' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_title_release_guardrails' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Guardrails Auto-Remediation Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_incident_title_workflow_bot_token_health' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Workflow Bot Token Health Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_mode' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode -eq 'dual-mode-semver-preferred') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 2fab11d..1f33622 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -95,6 +95,14 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) Add-Check -Name 'ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) Add-Check -Name 'ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) + Add-Check -Name 'ops_policy_slo_alert_thresholds_warning_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_min_success_rate_pct -eq 99.5) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_min_success_rate_pct) + Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_min_success_rate_pct -eq 99) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_min_success_rate_pct) + Add-Check -Name 'ops_policy_slo_alert_thresholds_warning_reason_workflow_missing_runs' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) -contains 'workflow_missing_runs') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes))) + Add-Check -Name 'ops_policy_slo_alert_thresholds_warning_reason_workflow_success_rate_below_threshold' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) -contains 'workflow_success_rate_below_threshold') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes))) + Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_workflow_failure_detected' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'workflow_failure_detected') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_sync_guard_missing' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_missing') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_sync_guard_stale' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_stale') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_slo_gate_runtime_error' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'slo_gate_runtime_error') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) Add-Check -Name 'ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Name 'ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Name 'ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) @@ -102,6 +110,7 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_incident_reopen_on_regression' -Passed ([bool]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) -Detail ([string]$releaseClient.ops_control_plane_policy.incident_lifecycle.reopen_on_regression) Add-Check -Name 'ops_policy_incident_title_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Control Plane Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) Add-Check -Name 'ops_policy_incident_title_release_guardrails' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Guardrails Auto-Remediation Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) + Add-Check -Name 'ops_policy_incident_title_workflow_bot_token_health' -Passed (@($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Workflow Bot Token Health Alert') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles))) Add-Check -Name 'ops_policy_tag_strategy_mode' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode -eq 'dual-mode-semver-preferred') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.mode) Add-Check -Name 'ops_policy_tag_strategy_legacy_tag_family' -Passed ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family -eq 'legacy_date_window') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.legacy_tag_family) Add-Check -Name 'ops_policy_tag_strategy_semver_only_enforce' -Passed (([DateTime]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') -eq '2026-07-01T00:00:00Z') -Detail ([string]$releaseClient.ops_control_plane_policy.tag_strategy.semver_only_enforce_utc) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index d99ab94..129c5d9 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -92,6 +92,15 @@ try { if (-not $opsPolicyPresent) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_policy_missing' } else { + $sloAlertThresholdsPresent = ($null -ne $releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_slo_alert_thresholds_present' + passed = $sloAlertThresholdsPresent + }) | Out-Null + if (-not $sloAlertThresholdsPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_slo_alert_thresholds_missing' + } + $selfHealingPresent = ($null -ne $releaseClient.ops_control_plane_policy.self_healing) $checks.Add([ordered]@{ check = 'release_client_ops_control_plane_policy_self_healing_present' diff --git a/scripts/Test-WorkflowBotTokenHealth.ps1 b/scripts/Test-WorkflowBotTokenHealth.ps1 new file mode 100644 index 0000000..4771396 --- /dev/null +++ b/scripts/Test-WorkflowBotTokenHealth.ps1 @@ -0,0 +1,207 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter()] + [ValidatePattern('^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$')] + [string]$Repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork', + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Add-ReasonCode { + param( + [Parameter(Mandatory = $true)][AllowEmptyCollection()][System.Collections.Generic.List[string]]$Target, + [Parameter(Mandatory = $true)][string]$ReasonCode + ) + + if (-not $Target.Contains($ReasonCode)) { + [void]$Target.Add($ReasonCode) + } +} + +function Resolve-TokenFailureReason { + param( + [Parameter()][string]$MessageText = '' + ) + + $normalized = ([string]$MessageText).ToLowerInvariant() + foreach ($token in @( + 'bad credentials', + 'authentication required', + 'requires authentication', + 'not logged into any hosts', + 'http 401' + )) { + if ($normalized.Contains([string]$token)) { + return 'token_invalid' + } + } + + foreach ($token in @( + 'resource not accessible by integration', + 'insufficient permissions', + 'must have admin rights', + 'requires admin access', + 'http 403', + 'forbidden' + )) { + if ($normalized.Contains([string]$token)) { + return 'token_scope_insufficient' + } + } + + return 'token_health_runtime_error' +} + +function Invoke-TokenCheck { + param( + [Parameter(Mandatory = $true)][string]$Name, + [Parameter(Mandatory = $true)][ScriptBlock]$Action + ) + + try { + & $Action | Out-Null + return [pscustomobject]@{ + name = $Name + status = 'pass' + message = 'ok' + reason_code = 'ok' + } + } catch { + $message = [string]$_.Exception.Message + $reasonCode = Resolve-TokenFailureReason -MessageText $message + return [pscustomobject]@{ + name = $Name + status = 'fail' + message = $message + reason_code = $reasonCode + } + } +} + +$report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + repository = $Repository + status = 'fail' + reason_codes = @() + message = '' + checks = @() +} + +$reasonCodes = [System.Collections.Generic.List[string]]::new() +$checks = [System.Collections.Generic.List[object]]::new() + +try { + if ([string]::IsNullOrWhiteSpace([string]$env:GH_TOKEN)) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'token_missing' + $checks.Add([ordered]@{ + name = 'token_present' + status = 'fail' + message = 'GH_TOKEN is not set in environment.' + reason_code = 'token_missing' + }) | Out-Null + } else { + $checks.Add([ordered]@{ + name = 'token_present' + status = 'pass' + message = 'GH_TOKEN is present.' + reason_code = 'ok' + }) | Out-Null + + $repoParts = $Repository.Split('/') + if ($repoParts.Count -ne 2) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'repository_invalid' + $checks.Add([ordered]@{ + name = 'repository_slug' + status = 'fail' + message = "Repository slug is invalid: $Repository" + reason_code = 'repository_invalid' + }) | Out-Null + } else { + $owner = [string]$repoParts[0] + $name = [string]$repoParts[1] + $branchProtectionQuery = @' +query($owner:String!, $name:String!) { + repository(owner:$owner, name:$name) { + branchProtectionRules(first:5) { + nodes { + pattern + } + } + } +} +'@ + + $checkResults = @( + Invoke-TokenCheck -Name 'viewer_query' -Action { + Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', 'query=query { viewer { login } }' + ) + }, + Invoke-TokenCheck -Name 'repo_read' -Action { + Invoke-GhJson -Arguments @( + 'api', "repos/$Repository" + ) + }, + Invoke-TokenCheck -Name 'actions_runners_read' -Action { + Invoke-GhJson -Arguments @( + 'api', "repos/$Repository/actions/runners?per_page=1" + ) + }, + Invoke-TokenCheck -Name 'branch_protection_graphql_read' -Action { + Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', ("query={0}" -f $branchProtectionQuery), + '-F', ("owner={0}" -f $owner), + '-F', ("name={0}" -f $name) + ) + } + ) + + foreach ($entry in @($checkResults)) { + $checks.Add([ordered]@{ + name = [string]$entry.name + status = [string]$entry.status + message = [string]$entry.message + reason_code = [string]$entry.reason_code + }) | Out-Null + if ([string]$entry.status -ne 'pass' -and [string]$entry.reason_code -ne 'ok') { + Add-ReasonCode -Target $reasonCodes -ReasonCode ([string]$entry.reason_code) + } + } + } + } + + $report.checks = @($checks) + if ($reasonCodes.Count -eq 0) { + $report.status = 'pass' + $report.reason_codes = @('ok') + $report.message = 'Workflow bot token health checks passed.' + } else { + $report.status = 'fail' + $report.reason_codes = @($reasonCodes) + $report.message = "Workflow bot token health checks failed. reason_codes=$([string]::Join(',', @($reasonCodes)))" + } +} +catch { + $report.status = 'fail' + $report.reason_codes = @('token_health_runtime_error') + $report.message = [string]$_.Exception.Message +} +finally { + Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null +} + +if ([string]$report.status -eq 'pass') { + exit 0 +} + +exit 1 diff --git a/tests/OpsIncidentLifecycleContract.Tests.ps1 b/tests/OpsIncidentLifecycleContract.Tests.ps1 index 8061e1b..793a74b 100644 --- a/tests/OpsIncidentLifecycleContract.Tests.ps1 +++ b/tests/OpsIncidentLifecycleContract.Tests.ps1 @@ -34,5 +34,8 @@ Describe 'Ops incident lifecycle contract' { $script:scriptContent | Should -Match 'schema_version' $script:scriptContent | Should -Match 'Write-WorkflowOpsReport' $script:scriptContent | Should -Match 'runtime_error' + $script:scriptContent | Should -Match 'Normalize-IncidentBody' + $script:scriptContent | Should -Match 'body_line_count' + $script:scriptContent | Should -Match 'body_sha256' } } diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index 664d1cd..4c83ed2 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -39,6 +39,7 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'release_client_drift' $script:runtimeContent | Should -Match 'runtime_images_missing' $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_slo_alert_thresholds_missing' $script:runtimeContent | Should -Match 'ops_control_plane_self_healing_missing' $script:runtimeContent | Should -Match 'ops_control_plane_guardrails_missing' $script:runtimeContent | Should -Match 'ops_control_plane_stable_window_missing' diff --git a/tests/OpsSloGateWorkflowContract.Tests.ps1 b/tests/OpsSloGateWorkflowContract.Tests.ps1 index 25b0319..0a3a039 100644 --- a/tests/OpsSloGateWorkflowContract.Tests.ps1 +++ b/tests/OpsSloGateWorkflowContract.Tests.ps1 @@ -30,6 +30,8 @@ Describe 'Ops SLO gate workflow contract' { $script:workflowContent | Should -Match 'auto_self_heal' $script:workflowContent | Should -Match 'self_heal_max_attempts' $script:workflowContent | Should -Match 'self_heal_watch_timeout_minutes' + $script:workflowContent | Should -Match 'warning_min_success_rate_pct' + $script:workflowContent | Should -Match 'critical_min_success_rate_pct' } It 'runs self-healing SLO runtime, uploads report, and manages incident lifecycle' { @@ -62,6 +64,11 @@ Describe 'Ops SLO gate workflow contract' { $script:selfHealingContent | Should -Match '\$dispatchInputs = @\(' $script:selfHealingContent | Should -Match '-Inputs \$dispatchInputs' $script:selfHealingContent | Should -Match 'sync_guard_max_age_hours' + $script:selfHealingContent | Should -Match 'warning_min_success_rate_pct' + $script:selfHealingContent | Should -Match 'critical_min_success_rate_pct' + $script:selfHealingContent | Should -Match 'alert_severity' + $script:selfHealingContent | Should -Match 'critical_reason_codes' + $script:selfHealingContent | Should -Match 'warning_reason_codes' $script:selfHealingContent | Should -Match 'already_healthy' $script:selfHealingContent | Should -Match 'remediated' $script:selfHealingContent | Should -Match 'auto_remediation_disabled' diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index 30a1045..272c843 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -55,6 +55,14 @@ Describe 'Release client policy contract' { $releaseClient.ops_control_plane_policy.slo_gate.lookback_days | Should -Be 7 $releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct | Should -Be 100 $releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours | Should -Be 12 + $releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_min_success_rate_pct | Should -Be 99.5 + $releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_min_success_rate_pct | Should -Be 99 + @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) | Should -Contain 'workflow_missing_runs' + @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) | Should -Contain 'workflow_success_rate_below_threshold' + @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'workflow_failure_detected' + @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'sync_guard_missing' + @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'sync_guard_stale' + @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'slo_gate_runtime_error' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-monitoring' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-autoremediate' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'release-control-plane' @@ -75,6 +83,7 @@ Describe 'Release client policy contract' { @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Ops Policy Drift Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Guardrails Auto-Remediation Alert' @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Release Rollback Drill Alert' + @($releaseClient.ops_control_plane_policy.incident_lifecycle.titles) | Should -Contain 'Workflow Bot Token Health Alert' $releaseClient.ops_control_plane_policy.self_healing.enabled | Should -BeTrue $releaseClient.ops_control_plane_policy.self_healing.max_attempts | Should -Be 1 $releaseClient.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow | Should -Be 'ops-autoremediate.yml' @@ -110,6 +119,8 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'runtime_images_ops_runtime_base_digest' $script:policyScriptContent | Should -Match 'ops_control_plane_policy_exists' $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' + $script:policyScriptContent | Should -Match 'ops_policy_slo_alert_thresholds_warning_min_success_rate_pct' + $script:policyScriptContent | Should -Match 'ops_policy_slo_alert_thresholds_critical_reason_slo_gate_runtime_error' $script:policyScriptContent | Should -Match 'ops_policy_tag_strategy_semver_only_enforce' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_full_cycle_weekday_monday' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_pattern_exists' diff --git a/tests/ScopeAOpsRunbookContract.Tests.ps1 b/tests/ScopeAOpsRunbookContract.Tests.ps1 index c0f9a4e..31b458f 100644 --- a/tests/ScopeAOpsRunbookContract.Tests.ps1 +++ b/tests/ScopeAOpsRunbookContract.Tests.ps1 @@ -28,6 +28,7 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' $script:runbookContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:runbookContent | Should -Match 'ops-slo-gate\.yml' + $script:runbookContent | Should -Match 'workflow-bot-token-drill\.yml' $script:runbookContent | Should -Match 'ops-policy-drift-check\.yml' $script:runbookContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:runbookContent | Should -Match 'release-rollback-drill\.yml' @@ -47,6 +48,7 @@ Describe 'Scope A ops runbook contract' { $script:runbookContent | Should -Match 'Release Race Hardening Drill Alert' $script:runbookContent | Should -Match 'Branch Protection Drift Alert' $script:runbookContent | Should -Match 'Release Guardrails Auto-Remediation Alert' + $script:runbookContent | Should -Match 'Workflow Bot Token Health Alert' $script:runbookContent | Should -Match 'release-race-hardening-weekly-summary\.json' $script:runbookContent | Should -Match 'promotion_lineage_invalid' $script:runbookContent | Should -Match 'stable_window_override_invalid' @@ -62,6 +64,7 @@ Describe 'Scope A ops runbook contract' { $script:readmeContent | Should -Match 'ops-monitoring\.yml' $script:readmeContent | Should -Match 'canary-smoke-tag-hygiene\.yml' $script:readmeContent | Should -Match 'ops-slo-gate\.yml' + $script:readmeContent | Should -Match 'workflow-bot-token-drill\.yml' $script:readmeContent | Should -Match 'ops-policy-drift-check\.yml' $script:readmeContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:readmeContent | Should -Match 'release-rollback-drill\.yml' @@ -85,6 +88,7 @@ Describe 'Scope A ops runbook contract' { $script:agentsContent | Should -Match 'sync_guard_failed' $script:agentsContent | Should -Match 'canary-smoke-tag-hygiene\.yml' $script:agentsContent | Should -Match 'ops-slo-gate\.yml' + $script:agentsContent | Should -Match 'workflow-bot-token-drill\.yml' $script:agentsContent | Should -Match 'ops-policy-drift-check\.yml' $script:agentsContent | Should -Match 'release-guardrails-autoremediate\.yml' $script:agentsContent | Should -Match 'release-rollback-drill\.yml' diff --git a/tests/WorkflowBotTokenDrillWorkflowContract.Tests.ps1 b/tests/WorkflowBotTokenDrillWorkflowContract.Tests.ps1 new file mode 100644 index 0000000..c65ab89 --- /dev/null +++ b/tests/WorkflowBotTokenDrillWorkflowContract.Tests.ps1 @@ -0,0 +1,50 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Workflow bot token drill contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:workflowPath = Join-Path $script:repoRoot '.github/workflows/workflow-bot-token-drill.yml' + $script:runtimePath = Join-Path $script:repoRoot 'scripts/Test-WorkflowBotTokenHealth.ps1' + + foreach ($path in @($script:workflowPath, $script:runtimePath)) { + if (-not (Test-Path -LiteralPath $path -PathType Leaf)) { + throw "Workflow bot token drill contract file missing: $path" + } + } + + $script:workflowContent = Get-Content -LiteralPath $script:workflowPath -Raw + $script:runtimeContent = Get-Content -LiteralPath $script:runtimePath -Raw + } + + It 'is scheduled and dispatchable with explicit workflow bot token preflight' { + $script:workflowContent | Should -Match 'schedule:' + $script:workflowContent | Should -Match 'workflow_dispatch:' + $script:workflowContent | Should -Match 'WORKFLOW_BOT_TOKEN' + $script:workflowContent | Should -Match 'workflow_bot_token_missing' + } + + It 'runs token health checks, publishes a report, and manages incidents' { + $script:workflowContent | Should -Match 'Test-WorkflowBotTokenHealth\.ps1' + $script:workflowContent | Should -Match 'workflow-bot-token-drill-report\.json' + $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' + $script:workflowContent | Should -Match 'Workflow Bot Token Health Alert' + $script:workflowContent | Should -Match '-Mode Fail' + $script:workflowContent | Should -Match '-Mode Recover' + } + + It 'keeps deterministic token health reason codes explicit' { + foreach ($reasonCode in @( + 'ok', + 'token_missing', + 'token_invalid', + 'token_scope_insufficient', + 'token_health_runtime_error' + )) { + $pattern = [regex]::Escape($reasonCode) + $script:runtimeContent | Should -Match $pattern + } + } +} diff --git a/tests/WorkspaceSurfaceContract.Tests.ps1 b/tests/WorkspaceSurfaceContract.Tests.ps1 index 1a53fe2..976ff07 100644 --- a/tests/WorkspaceSurfaceContract.Tests.ps1 +++ b/tests/WorkspaceSurfaceContract.Tests.ps1 @@ -33,6 +33,7 @@ Describe 'Workspace surface contract' { $script:opsIncidentLifecycleScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsIncidentLifecycle.ps1' $script:opsSloGateScriptPath = Join-Path $script:repoRoot 'scripts/Test-OpsSloGate.ps1' $script:opsSloSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-OpsSloSelfHealing.ps1' + $script:workflowBotTokenHealthScriptPath = Join-Path $script:repoRoot 'scripts/Test-WorkflowBotTokenHealth.ps1' $script:opsPolicyDriftScriptPath = Join-Path $script:repoRoot 'scripts/Test-ReleaseControlPlanePolicyDrift.ps1' $script:rollbackDrillScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-ReleaseRollbackDrill.ps1' $script:rollbackSelfHealingScriptPath = Join-Path $script:repoRoot 'scripts/Invoke-RollbackDrillSelfHealing.ps1' @@ -58,6 +59,7 @@ Describe 'Workspace surface contract' { $script:canaryWorkflowPath = Join-Path $script:repoRoot '.github/workflows/nightly-supplychain-canary.yml' $script:opsSloGateWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-slo-gate.yml' $script:opsPolicyDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/ops-policy-drift-check.yml' + $script:workflowBotTokenDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/workflow-bot-token-drill.yml' $script:releaseGuardrailsAutoRemediationWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-guardrails-autoremediate.yml' $script:branchProtectionDriftWorkflowPath = Join-Path $script:repoRoot '.github/workflows/branch-protection-drift-check.yml' $script:rollbackDrillWorkflowPath = Join-Path $script:repoRoot '.github/workflows/release-rollback-drill.yml' @@ -109,6 +111,7 @@ Describe 'Workspace surface contract' { $script:opsIncidentLifecycleScriptPath, $script:opsSloGateScriptPath, $script:opsSloSelfHealingScriptPath, + $script:workflowBotTokenHealthScriptPath, $script:opsPolicyDriftScriptPath, $script:rollbackDrillScriptPath, $script:rollbackSelfHealingScriptPath, @@ -134,6 +137,7 @@ Describe 'Workspace surface contract' { $script:canaryWorkflowPath, $script:opsSloGateWorkflowPath, $script:opsPolicyDriftWorkflowPath, + $script:workflowBotTokenDrillWorkflowPath, $script:releaseGuardrailsAutoRemediationWorkflowPath, $script:branchProtectionDriftWorkflowPath, $script:rollbackDrillWorkflowPath, @@ -295,6 +299,14 @@ Describe 'Workspace surface contract' { (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') | Should -BeTrue + $script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.warning_min_success_rate_pct | Should -Be 99.5 + $script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.critical_min_success_rate_pct | Should -Be 99 + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) -contains 'workflow_missing_runs') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.warning_reason_codes) -contains 'workflow_success_rate_below_threshold') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'workflow_failure_detected') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_missing') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_stale') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'slo_gate_runtime_error') | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.auto_close_on_recovery | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.reopen_on_regression | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.tag_strategy.mode | Should -Be 'dual-mode-semver-preferred' @@ -312,6 +324,7 @@ Describe 'Workspace surface contract' { (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Ops Policy Drift Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Guardrails Auto-Remediation Alert') | Should -BeTrue (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Release Rollback Drill Alert') | Should -BeTrue + (@($script:manifest.installer_contract.release_client.ops_control_plane_policy.incident_lifecycle.titles) -contains 'Workflow Bot Token Health Alert') | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.enabled | Should -BeTrue $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.max_attempts | Should -Be 1 $script:manifest.installer_contract.release_client.ops_control_plane_policy.self_healing.slo_gate.remediation_workflow | Should -Be 'ops-autoremediate.yml' diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 66b2ca1..7346be1 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -258,6 +258,20 @@ "lookback_days": 7, "min_success_rate_pct": 100, "max_sync_guard_age_hours": 12, + "alert_thresholds": { + "warning_min_success_rate_pct": 99.5, + "critical_min_success_rate_pct": 99, + "warning_reason_codes": [ + "workflow_missing_runs", + "workflow_success_rate_below_threshold" + ], + "critical_reason_codes": [ + "workflow_failure_detected", + "sync_guard_missing", + "sync_guard_stale", + "slo_gate_runtime_error" + ] + }, "required_workflows": [ "ops-monitoring", "ops-autoremediate", @@ -274,7 +288,8 @@ "Ops SLO Gate Alert", "Ops Policy Drift Alert", "Release Guardrails Auto-Remediation Alert", - "Release Rollback Drill Alert" + "Release Rollback Drill Alert", + "Workflow Bot Token Health Alert" ] }, "tag_strategy": { diff --git a/workspace-governance.json b/workspace-governance.json index 66b2ca1..7346be1 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -258,6 +258,20 @@ "lookback_days": 7, "min_success_rate_pct": 100, "max_sync_guard_age_hours": 12, + "alert_thresholds": { + "warning_min_success_rate_pct": 99.5, + "critical_min_success_rate_pct": 99, + "warning_reason_codes": [ + "workflow_missing_runs", + "workflow_success_rate_below_threshold" + ], + "critical_reason_codes": [ + "workflow_failure_detected", + "sync_guard_missing", + "sync_guard_stale", + "slo_gate_runtime_error" + ] + }, "required_workflows": [ "ops-monitoring", "ops-autoremediate", @@ -274,7 +288,8 @@ "Ops SLO Gate Alert", "Ops Policy Drift Alert", "Release Guardrails Auto-Remediation Alert", - "Release Rollback Drill Alert" + "Release Rollback Drill Alert", + "Workflow Bot Token Health Alert" ] }, "tag_strategy": { From 4d6cd8b373ad2753e5f2f63dcd9c3b7006c6b1f7 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 01:11:47 -0800 Subject: [PATCH 54/60] Fix workflow bot token drill repository-name resolution --- scripts/Test-WorkflowBotTokenHealth.ps1 | 50 ++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/scripts/Test-WorkflowBotTokenHealth.ps1 b/scripts/Test-WorkflowBotTokenHealth.ps1 index 4771396..d3806b2 100644 --- a/scripts/Test-WorkflowBotTokenHealth.ps1 +++ b/scripts/Test-WorkflowBotTokenHealth.ps1 @@ -126,7 +126,7 @@ try { }) | Out-Null } else { $owner = [string]$repoParts[0] - $name = [string]$repoParts[1] + $repoName = [string]$repoParts[1] $branchProtectionQuery = @' query($owner:String!, $name:String!) { repository(owner:$owner, name:$name) { @@ -140,30 +140,30 @@ query($owner:String!, $name:String!) { '@ $checkResults = @( - Invoke-TokenCheck -Name 'viewer_query' -Action { - Invoke-GhJson -Arguments @( - 'api', 'graphql', - '-f', 'query=query { viewer { login } }' - ) - }, - Invoke-TokenCheck -Name 'repo_read' -Action { - Invoke-GhJson -Arguments @( - 'api', "repos/$Repository" - ) - }, - Invoke-TokenCheck -Name 'actions_runners_read' -Action { - Invoke-GhJson -Arguments @( - 'api', "repos/$Repository/actions/runners?per_page=1" - ) - }, - Invoke-TokenCheck -Name 'branch_protection_graphql_read' -Action { - Invoke-GhJson -Arguments @( - 'api', 'graphql', - '-f', ("query={0}" -f $branchProtectionQuery), - '-F', ("owner={0}" -f $owner), - '-F', ("name={0}" -f $name) - ) - } + (Invoke-TokenCheck -Name 'viewer_query' -Action { + Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', 'query=query { viewer { login } }' + ) + }) + (Invoke-TokenCheck -Name 'repo_read' -Action { + Invoke-GhJson -Arguments @( + 'api', "repos/$Repository" + ) + }) + (Invoke-TokenCheck -Name 'actions_runners_read' -Action { + Invoke-GhJson -Arguments @( + 'api', "repos/$Repository/actions/runners?per_page=1" + ) + }) + (Invoke-TokenCheck -Name 'branch_protection_graphql_read' -Action { + Invoke-GhJson -Arguments @( + 'api', 'graphql', + '-f', ("query={0}" -f $branchProtectionQuery), + '-F', ("owner={0}" -f $owner), + '-F', ("name={0}" -f $repoName) + ) + }) ) foreach ($entry in @($checkResults)) { From c3cf8a04e18cd91fa56b49288a5eafec5f5c634f Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 01:30:22 -0800 Subject: [PATCH 55/60] Harden control-plane watch timeout and race-drill traceability --- .github/workflows/release-control-plane.yml | 19 +++++++++ AGENTS.md | 5 +++ README.md | 5 +++ .../runbooks/release-ops-incident-response.md | 2 + scripts/Invoke-ReleaseControlPlane.ps1 | 40 ++++++++++++++++--- scripts/Invoke-ReleaseRaceHardeningDrill.ps1 | 32 +++++++++++---- ...easeControlPlaneWorkflowContract.Tests.ps1 | 5 +++ ...ceHardeningDrillWorkflowContract.Tests.ps1 | 5 +++ 8 files changed, 100 insertions(+), 13 deletions(-) diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 6a03143..7a94c61 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -31,6 +31,11 @@ on: required: false default: '1' type: string + watch_timeout_minutes: + description: Timeout minutes for watching dispatched release workflows. + required: false + default: '120' + type: string force_stable_promotion_outside_window: description: Force stable promotion outside policy window for FullCycle (audited). required: false @@ -107,6 +112,19 @@ jobs: $keepLatestCanaryN = $parsedKeepLatestCanaryN } + $watchTimeoutText = [string]'${{ inputs.watch_timeout_minutes }}' + $watchTimeoutMinutes = 120 + if (-not [string]::IsNullOrWhiteSpace($watchTimeoutText)) { + $parsedWatchTimeoutMinutes = 0 + if (-not [int]::TryParse($watchTimeoutText, [ref]$parsedWatchTimeoutMinutes)) { + throw "watch_timeout_minutes must be an integer. actual='$watchTimeoutText'" + } + if ($parsedWatchTimeoutMinutes -lt 5 -or $parsedWatchTimeoutMinutes -gt 240) { + throw "watch_timeout_minutes must be between 5 and 240. actual='$watchTimeoutText'" + } + $watchTimeoutMinutes = $parsedWatchTimeoutMinutes + } + $autoRemediateText = [string]'${{ inputs.auto_remediate }}' $autoRemediate = $true if (-not [string]::IsNullOrWhiteSpace($autoRemediateText)) { @@ -136,6 +154,7 @@ jobs: -Mode $mode ` -SyncGuardMaxAgeHours $syncGuardAgeHours ` -KeepLatestCanaryN $keepLatestCanaryN ` + -WatchTimeoutMinutes $watchTimeoutMinutes ` -AutoRemediate:$autoRemediate ` -ForceStablePromotionOutsideWindow:$forceStablePromotionOutsideWindow ` -ForceStablePromotionReason $forceStablePromotionReason ` diff --git a/AGENTS.md b/AGENTS.md index 68e316d..2542627 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -254,6 +254,8 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `promotion_source_not_at_head` - `promotion_lineage_invalid` - `stable_window_override_invalid` + - `release_dispatch_report_invalid` + - `release_dispatch_watch_timeout` - `release_dispatch_watch_failed` - `release_verification_failed` - `canary_hygiene_failed` @@ -343,6 +345,9 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - Guardrails report must include `remediation_hints` when status is fail and auto-remediation cannot fully recover. - Race-hardening drill reason codes must remain explicit: - `drill_passed` + - `contender_dispatch_report_invalid` + - `control_plane_dispatch_report_invalid` + - `control_plane_watch_timeout` - `control_plane_collision_not_observed` - `control_plane_report_download_failed` - `control_plane_report_missing` diff --git a/README.md b/README.md index 1db7701..fcda485 100644 --- a/README.md +++ b/README.md @@ -419,6 +419,8 @@ Top-level release-control-plane deterministic failure reason codes include: - `promotion_source_not_at_head` - `promotion_lineage_invalid` - `stable_window_override_invalid` +- `release_dispatch_report_invalid` +- `release_dispatch_watch_timeout` - `release_dispatch_watch_failed` - `release_verification_failed` - `canary_hygiene_failed` @@ -490,6 +492,9 @@ It runs `scripts/Invoke-ReleaseRaceHardeningDrill.ps1` to prove release-tag coll - requires release verification evidence from control-plane report (`release_verification.status=pass`) - deterministic failure reason codes include: - `control_plane_collision_not_observed` + - `contender_dispatch_report_invalid` + - `control_plane_dispatch_report_invalid` + - `control_plane_watch_timeout` - `control_plane_report_download_failed` - `control_plane_report_missing` - `control_plane_run_failed` diff --git a/docs/runbooks/release-ops-incident-response.md b/docs/runbooks/release-ops-incident-response.md index c155281..c5840fe 100644 --- a/docs/runbooks/release-ops-incident-response.md +++ b/docs/runbooks/release-ops-incident-response.md @@ -29,6 +29,8 @@ Reason code mapping: - `sync_guard_incomplete`: only in-progress/queued runs exist; no completed run yet. - `promotion_lineage_invalid`: promotion source/target channel, SemVer core, or commit-SHA lineage check failed. - `stable_window_override_invalid`: requested stable override violated stable window policy (override disabled, missing reason, reason too short, or reason format mismatch). +- `release_dispatch_report_invalid`: release dispatch metadata was incomplete (for example, missing dispatched `run_id`). +- `release_dispatch_watch_timeout`: dispatched release run did not complete before the configured watch timeout. - `release_dispatch_watch_failed`: release workflow dispatch completed but run conclusion was not `success`. - `release_verification_failed`: post-dispatch release verification failed (missing assets or invalid `release-manifest.json` metadata). - `canary_hygiene_failed`: SemVer canary retention cleanup failed after publish. diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index 622c60b..a82942b 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -340,6 +340,8 @@ function Resolve-ControlPlaneFailureReasonCode { if ($message -match '^semver_prerelease_sequence_exhausted') { return 'semver_prerelease_sequence_exhausted' } if ($message -match '^release_tag_collision_retry_exhausted') { return 'release_tag_collision_retry_exhausted' } if ($message -match '^release_dispatch_attempts_exhausted') { return 'release_dispatch_attempts_exhausted' } + if ($message -match '^release_dispatch_report_invalid') { return 'release_dispatch_report_invalid' } + if ($message -match '^release_watch_timeout') { return 'release_dispatch_watch_timeout' } if ($message -match '^release_watch_failed|^release_watch_not_success') { return 'release_dispatch_watch_failed' } if ($message -match '^release_verification_') { return 'release_verification_failed' } if ($message -match '^canary_hygiene_failed') { return 'canary_hygiene_failed' } @@ -1427,28 +1429,54 @@ function Invoke-ReleaseMode { -Inputs $dispatchInputs ` -OutputPath $dispatchReportPath | Out-Null $dispatchReport = Get-Content -LiteralPath $dispatchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $dispatchRunId = [string]$dispatchReport.run_id + if ([string]::IsNullOrWhiteSpace($dispatchRunId)) { + throw "release_dispatch_report_invalid: mode=$ModeName attempt=$dispatchAttempt field=run_id" + } $watchReportPath = Join-Path $ScratchRoot "$ModeName-watch-$dispatchAttempt.json" & pwsh -NoProfile -File $watchWorkflowScript ` -Repository $Repository ` - -RunId ([string]$dispatchReport.run_id) ` + -RunId $dispatchRunId ` -TimeoutMinutes $WatchTimeoutMinutes ` -OutputPath $watchReportPath | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "release_watch_failed: mode=$ModeName run_id=$([string]$dispatchReport.run_id) exit_code=$LASTEXITCODE" + $watchExitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + if ($watchExitCode -ne 0) { + $watchFailureStatus = '' + $watchFailureConclusion = '' + $watchFailureClassifiedReason = '' + if (Test-Path -LiteralPath $watchReportPath -PathType Leaf) { + try { + $watchFailureReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + $watchFailureStatus = [string]$watchFailureReport.status + $watchFailureConclusion = [string]$watchFailureReport.conclusion + $watchFailureClassifiedReason = [string]$watchFailureReport.classified_reason + } catch { + $watchFailureClassifiedReason = 'watch_report_parse_failed' + } + } else { + $watchFailureClassifiedReason = 'watch_report_missing' + } + + if ([string]::Equals($watchFailureClassifiedReason, 'timeout', [System.StringComparison]::OrdinalIgnoreCase)) { + throw "release_watch_timeout: mode=$ModeName run_id=$dispatchRunId timeout_minutes=$WatchTimeoutMinutes status=$watchFailureStatus" + } + + throw "release_watch_failed: mode=$ModeName run_id=$dispatchRunId exit_code=$watchExitCode classified_reason=$watchFailureClassifiedReason conclusion=$watchFailureConclusion status=$watchFailureStatus" } $watchReport = Get-Content -LiteralPath $watchReportPath -Raw | ConvertFrom-Json -ErrorAction Stop $watchConclusion = [string]$watchReport.conclusion + $watchClassifiedReason = [string]$watchReport.classified_reason if ($watchConclusion -ne 'success') { - throw "release_watch_not_success: mode=$ModeName run_id=$([string]$dispatchReport.run_id) conclusion=$watchConclusion" + throw "release_watch_not_success: mode=$ModeName run_id=$dispatchRunId conclusion=$watchConclusion classified_reason=$watchClassifiedReason" } $dispatchRecord = [ordered]@{ status = 'success' workflow = $ReleaseWorkflowFile branch = $Branch - run_id = [string]$dispatchReport.run_id + run_id = $dispatchRunId url = [string]$watchReport.url conclusion = [string]$watchReport.conclusion attempts = $dispatchAttempt @@ -1458,7 +1486,7 @@ function Invoke-ReleaseMode { attempt = $dispatchAttempt tag = $targetTag status = 'success' - run_id = [string]$dispatchReport.run_id + run_id = $dispatchRunId run_url = [string]$watchReport.url }) break diff --git a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 index b822b64..813d5dd 100644 --- a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 +++ b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 @@ -85,8 +85,11 @@ function Resolve-RaceDrillFailureReasonCode { $message = [string]$MessageText if ($message -match '^required_script_missing') { return 'required_script_missing' } + if ($message -match '^contender_dispatch_report_invalid') { return 'contender_dispatch_report_invalid' } + if ($message -match '^control_plane_dispatch_report_invalid') { return 'control_plane_dispatch_report_invalid' } if ($message -match '^contender_release_dispatch_failed') { return 'contender_release_dispatch_failed' } if ($message -match '^control_plane_dispatch_failed') { return 'control_plane_dispatch_failed' } + if ($message -match '^control_plane_watch_timeout') { return 'control_plane_watch_timeout' } if ($message -match '^control_plane_run_failed') { return 'control_plane_run_failed' } if ($message -match '^control_plane_report_download_failed') { return 'control_plane_report_download_failed' } if ($message -match '^control_plane_report_missing') { return 'control_plane_report_missing' } @@ -367,6 +370,8 @@ $report = [ordered]@{ collision_retries = 0 predicted_target_tag = '' final_target_tag = '' + contender_run_id = '' + control_plane_run_id = '' dispatch_status = '' dispatch_reason_code = '' attempt_history_statuses = @() @@ -402,8 +407,12 @@ try { throw "contender_release_dispatch_failed: workflow=$ReleaseWorkflowFile exit_code=$LASTEXITCODE" } $contenderDispatch = Get-Content -LiteralPath $contenderDispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $contenderRunId = [string]$contenderDispatch.run_id + if ([string]::IsNullOrWhiteSpace($contenderRunId)) { + throw "contender_dispatch_report_invalid: workflow=$ReleaseWorkflowFile field=run_id" + } $report.dispatches.contender_release = [ordered]@{ - run_id = [string]$contenderDispatch.run_id + run_id = $contenderRunId head_sha = [string]$contenderDispatch.head_sha status = [string]$contenderDispatch.status url = [string]$contenderDispatch.url @@ -428,8 +437,12 @@ try { throw "control_plane_dispatch_failed: workflow=$ControlPlaneWorkflowFile exit_code=$LASTEXITCODE" } $controlPlaneDispatch = Get-Content -LiteralPath $controlPlaneDispatchPath -Raw | ConvertFrom-Json -ErrorAction Stop + $controlPlaneRunId = [string]$controlPlaneDispatch.run_id + if ([string]::IsNullOrWhiteSpace($controlPlaneRunId)) { + throw "control_plane_dispatch_report_invalid: workflow=$ControlPlaneWorkflowFile field=run_id" + } $report.dispatches.control_plane = [ordered]@{ - run_id = [string]$controlPlaneDispatch.run_id + run_id = $controlPlaneRunId head_sha = [string]$controlPlaneDispatch.head_sha status = [string]$controlPlaneDispatch.status url = [string]$controlPlaneDispatch.url @@ -449,7 +462,7 @@ try { $contenderWatchPath = Join-Path $scratchRoot 'contender-release-watch.json' $contenderWatch = Invoke-WorkflowWatchCapture ` -TargetRepository $Repository ` - -RunId ([string]$contenderDispatch.run_id) ` + -RunId $contenderRunId ` -TimeoutMinutes $WatchTimeoutMinutes ` -ReportPath $contenderWatchPath $report.watches.contender_release = [ordered]@{ @@ -463,13 +476,13 @@ try { runtime_error = [string]$contenderWatch.runtime_error } if (-not [bool]$contenderWatch.successful) { - Add-UniqueMessage -Target $warnings -Message "contender_watch_non_success: run_id=$([string]$contenderWatch.report.run_id) conclusion=$([string]$contenderWatch.report.conclusion)" + Add-UniqueMessage -Target $warnings -Message "contender_watch_non_success: run_id=$([string]$contenderWatch.report.run_id) conclusion=$([string]$contenderWatch.report.conclusion) classified_reason=$([string]$contenderWatch.report.classified_reason)" } $controlPlaneWatchPath = Join-Path $scratchRoot 'control-plane-watch.json' $controlPlaneWatch = Invoke-WorkflowWatchCapture ` -TargetRepository $Repository ` - -RunId ([string]$controlPlaneDispatch.run_id) ` + -RunId $controlPlaneRunId ` -TimeoutMinutes $WatchTimeoutMinutes ` -ReportPath $controlPlaneWatchPath $report.watches.control_plane = [ordered]@{ @@ -483,11 +496,14 @@ try { runtime_error = [string]$controlPlaneWatch.runtime_error } if (-not [bool]$controlPlaneWatch.successful) { + $controlPlaneClassifiedReason = [string]$controlPlaneWatch.report.classified_reason + if ([string]::Equals($controlPlaneClassifiedReason, 'timeout', [System.StringComparison]::OrdinalIgnoreCase)) { + throw "control_plane_watch_timeout: run_id=$controlPlaneRunId timeout_minutes=$WatchTimeoutMinutes" + } $controlPlaneConclusion = [string]$controlPlaneWatch.report.conclusion - throw "control_plane_run_failed: run_id=$([string]$controlPlaneDispatch.run_id) conclusion=$controlPlaneConclusion" + throw "control_plane_run_failed: run_id=$controlPlaneRunId conclusion=$controlPlaneConclusion classified_reason=$controlPlaneClassifiedReason" } - $controlPlaneRunId = [string]$controlPlaneDispatch.run_id $controlPlaneArtifactName = "release-control-plane-report-$controlPlaneRunId" $report.artifacts.control_plane_report_artifact = $controlPlaneArtifactName $artifactRoot = Join-Path $scratchRoot 'control-plane-report-artifact' @@ -589,6 +605,8 @@ try { collision_retries = $collisionRetries predicted_target_tag = [string]$targetTagRecord.tag final_target_tag = $targetTag + contender_run_id = $contenderRunId + control_plane_run_id = $controlPlaneRunId dispatch_status = $dispatchStatus dispatch_reason_code = $dispatchReasonCode attempt_history_statuses = @($attemptHistoryStatuses) diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index b542343..6422d53 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -27,6 +27,7 @@ Describe 'Release control plane workflow contract' { $script:workflowContent | Should -Match 'FullCycle' $script:workflowContent | Should -Match 'auto_remediate' $script:workflowContent | Should -Match 'keep_latest_canary_n' + $script:workflowContent | Should -Match 'watch_timeout_minutes' $script:workflowContent | Should -Match 'force_stable_promotion_outside_window' $script:workflowContent | Should -Match 'force_stable_promotion_reason' $script:workflowContent | Should -Match 'dry_run' @@ -73,6 +74,9 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'semver_prerelease_sequence_exhausted' $script:runtimeContent | Should -Match 'release_tag_collision_retry_exhausted' $script:runtimeContent | Should -Match 'release_dispatch_attempts_exhausted' + $script:runtimeContent | Should -Match 'release_dispatch_report_invalid' + $script:runtimeContent | Should -Match 'release_watch_timeout' + $script:runtimeContent | Should -Match 'release_dispatch_watch_timeout' $script:runtimeContent | Should -Match '\[release_tag_collision\]' $script:runtimeContent | Should -Match 'release_watch_not_success' $script:runtimeContent | Should -Match 'release_verification_asset_missing' @@ -93,6 +97,7 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'Invoke-CanarySmokeTagHygiene\.ps1' $script:runtimeContent | Should -Match '\$dispatchInputs = @\(' $script:runtimeContent | Should -Match '-Inputs \$dispatchInputs' + $script:workflowContent | Should -Match '-WatchTimeoutMinutes \$watchTimeoutMinutes' } It 'decouples control-plane runner health gate to release-runner labels' { diff --git a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 index 2b03a3b..5c403f2 100644 --- a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 @@ -49,6 +49,11 @@ Describe 'Release race-hardening drill workflow contract' { $script:runtimeContent | Should -Match 'gh run download' $script:runtimeContent | Should -Match 'control_plane_collision_not_observed' $script:runtimeContent | Should -Match 'collision_retries' + $script:runtimeContent | Should -Match 'contender_dispatch_report_invalid' + $script:runtimeContent | Should -Match 'control_plane_dispatch_report_invalid' + $script:runtimeContent | Should -Match 'control_plane_watch_timeout' + $script:runtimeContent | Should -Match 'contender_run_id' + $script:runtimeContent | Should -Match 'control_plane_run_id' $script:runtimeContent | Should -Match 'tag_already_published_by_peer' $script:runtimeContent | Should -Match 'reproducibility-report\.json' $script:runtimeContent | Should -Match 'drill_passed' From e577815ad4673de1977606172a614afb00771e6b Mon Sep 17 00:00:00 2001 From: Sergio Velderrain Date: Fri, 27 Feb 2026 02:57:20 -0800 Subject: [PATCH 56/60] feat(control-plane): add GA policy state machine, rollback orchestration, and error-budget guardrails (#56) Co-authored-by: svelderrainruiz --- .github/workflows/ops-slo-gate.yml | 48 ++ AGENTS.md | 16 + README.md | 16 +- scripts/Invoke-OpsSloSelfHealing.ps1 | 33 +- scripts/Invoke-ReleaseControlPlane.ps1 | 476 +++++++++++++++++- scripts/Test-OpsSloGate.ps1 | 69 +++ scripts/Test-PolicyContracts.ps1 | 12 + scripts/Test-ReleaseClientContracts.ps1 | 12 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 54 ++ .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 6 + tests/OpsSloGateWorkflowContract.Tests.ps1 | 9 + tests/ReleaseClientPolicyContract.Tests.ps1 | 18 + ...easeControlPlaneWorkflowContract.Tests.ps1 | 9 + .../workspace-governance.json | 49 ++ workspace-governance.json | 49 ++ 15 files changed, 873 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ops-slo-gate.yml b/.github/workflows/ops-slo-gate.yml index 6068cff..312688e 100644 --- a/.github/workflows/ops-slo-gate.yml +++ b/.github/workflows/ops-slo-gate.yml @@ -20,6 +20,21 @@ on: required: false default: '12' type: string + error_budget_window_days: + description: Error-budget window in days. + required: false + default: '7' + type: string + error_budget_max_failed_runs: + description: Maximum failed runs allowed in the error-budget window. + required: false + default: '0' + type: string + error_budget_max_failure_rate_pct: + description: Maximum failure-rate percentage allowed in the error-budget window. + required: false + default: '0' + type: string auto_self_heal: description: Enable bounded self-healing when SLO gate fails. required: false @@ -97,6 +112,36 @@ jobs: $syncGuardMaxAgeHours = $parsedMaxAge } + $errorBudgetWindowDaysText = [string]'${{ inputs.error_budget_window_days }}' + $errorBudgetWindowDays = 7 + if (-not [string]::IsNullOrWhiteSpace($errorBudgetWindowDaysText)) { + $parsedErrorBudgetWindowDays = 0 + if (-not [int]::TryParse($errorBudgetWindowDaysText, [ref]$parsedErrorBudgetWindowDays)) { + throw "error_budget_window_days must be an integer. actual='$errorBudgetWindowDaysText'" + } + $errorBudgetWindowDays = $parsedErrorBudgetWindowDays + } + + $errorBudgetMaxFailedRunsText = [string]'${{ inputs.error_budget_max_failed_runs }}' + $errorBudgetMaxFailedRuns = 0 + if (-not [string]::IsNullOrWhiteSpace($errorBudgetMaxFailedRunsText)) { + $parsedErrorBudgetMaxFailedRuns = 0 + if (-not [int]::TryParse($errorBudgetMaxFailedRunsText, [ref]$parsedErrorBudgetMaxFailedRuns)) { + throw "error_budget_max_failed_runs must be an integer. actual='$errorBudgetMaxFailedRunsText'" + } + $errorBudgetMaxFailedRuns = $parsedErrorBudgetMaxFailedRuns + } + + $errorBudgetMaxFailureRatePctText = [string]'${{ inputs.error_budget_max_failure_rate_pct }}' + $errorBudgetMaxFailureRatePct = 0.0 + if (-not [string]::IsNullOrWhiteSpace($errorBudgetMaxFailureRatePctText)) { + $parsedErrorBudgetMaxFailureRatePct = 0.0 + if (-not [double]::TryParse($errorBudgetMaxFailureRatePctText, [ref]$parsedErrorBudgetMaxFailureRatePct)) { + throw "error_budget_max_failure_rate_pct must be a number. actual='$errorBudgetMaxFailureRatePctText'" + } + $errorBudgetMaxFailureRatePct = $parsedErrorBudgetMaxFailureRatePct + } + $autoSelfHealText = [string]'${{ inputs.auto_self_heal }}' $autoSelfHeal = $true if (-not [string]::IsNullOrWhiteSpace($autoSelfHealText)) { @@ -148,6 +193,9 @@ jobs: -LookbackDays $lookbackDays ` -MinSuccessRatePct $minSuccessRatePct ` -SyncGuardMaxAgeHours $syncGuardMaxAgeHours ` + -ErrorBudgetWindowDays $errorBudgetWindowDays ` + -ErrorBudgetMaxFailedRuns $errorBudgetMaxFailedRuns ` + -ErrorBudgetMaxFailureRatePct $errorBudgetMaxFailureRatePct ` -AutoRemediate:$autoSelfHeal ` -MaxAttempts $selfHealMaxAttempts ` -WatchTimeoutMinutes $selfHealWatchTimeout ` diff --git a/AGENTS.md b/AGENTS.md index 2542627..0165a7b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -213,6 +213,9 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `no_automatable_action` - `remediation_failed` - `.github/workflows/release-control-plane.yml` is the autonomous release orchestrator and must run `scripts/Invoke-ReleaseControlPlane.ps1`. +- `ops_control_plane_policy.schema_version` is required and currently pinned to `2.0`. +- `ops_control_plane_policy.state_machine` is required and must emit runtime transition evidence in `release-control-plane-report.json`. +- `ops_control_plane_policy.rollback_orchestration` is required and controls deterministic rollback self-healing trigger behavior. - Control-plane mode contract: - `Validate` - `CanaryCycle` @@ -268,6 +271,11 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `critical_min_success_rate_pct` - `warning_reason_codes` - `critical_reason_codes` +- SLO gate error budget must remain explicit under `ops_control_plane_policy.error_budget`: + - `window_days` + - `max_failed_runs` + - `max_failure_rate_pct` + - `critical_burn_rate_pct` - SLO self-healing reason codes must remain explicit: - `already_healthy` - `remediated` @@ -281,6 +289,8 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `workflow_success_rate_below_threshold` - `sync_guard_missing` - `sync_guard_stale` + - `error_budget_exhausted` + - `error_budget_failure_rate_exceeded` - `slo_gate_runtime_error` - `.github/workflows/ops-policy-drift-check.yml` must run `scripts/Test-ReleaseControlPlanePolicyDrift.ps1`. - Policy drift reason codes must remain explicit: @@ -290,6 +300,12 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `release_client_drift` - `runtime_images_missing` - `ops_control_plane_policy_missing` + - `ops_control_plane_schema_version_invalid` + - `ops_control_plane_state_machine_missing` + - `ops_control_plane_state_machine_version_missing` + - `ops_control_plane_rollback_orchestration_missing` + - `ops_control_plane_error_budget_missing` + - `ops_control_plane_error_budget_window_days_invalid` - `ops_control_plane_slo_alert_thresholds_missing` - `ops_control_plane_self_healing_missing` - `ops_control_plane_guardrails_missing` diff --git a/README.md b/README.md index fcda485..2d5162d 100644 --- a/README.md +++ b/README.md @@ -409,6 +409,8 @@ Control-plane behavior: 10. Emits explicit override audit artifact `release-control-plane-override-audit.json` for every run. 11. Auto-opens incident title `Release Control Plane Stable Override Alert` whenever decision code is `stable_window_override_applied`. 12. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. +13. Loads GA policy contract `installer_contract.release_client.ops_control_plane_policy.schema_version=2.0` and emits state-machine execution evidence (`state_machine.transitions_executed`) in every report. +14. Executes deterministic rollback orchestration (`Invoke-RollbackDrillSelfHealing.ps1`) when configured trigger reason codes are hit. Top-level release-control-plane deterministic failure reason codes include: - `ops_health_gate_failed` @@ -433,11 +435,15 @@ Top-level release-control-plane deterministic failure reason codes include: - 7-day lookback by default - 100% success-rate target for `ops-monitoring`, `ops-autoremediate`, and `release-control-plane` - max sync-guard success age of 12 hours +- hard error-budget defaults: + - 7-day budget window + - max failed runs: `0` + - max failure-rate percent: `0` - alert thresholds for severity classification: - warning minimum workflow success rate: `99.5` - critical minimum workflow success rate: `99` - warning reason codes: `workflow_missing_runs`, `workflow_success_rate_below_threshold` - - critical reason codes: `workflow_failure_detected`, `sync_guard_missing`, `sync_guard_stale`, `slo_gate_runtime_error` + - critical reason codes: `workflow_failure_detected`, `sync_guard_missing`, `sync_guard_stale`, `slo_gate_runtime_error`, `error_budget_exhausted`, `error_budget_failure_rate_exceeded` - bounded self-healing by dispatching `ops-autoremediate.yml` and re-verifying SLO status - deterministic reason codes on failure: - `auto_remediation_disabled` @@ -450,6 +456,8 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `workflow_success_rate_below_threshold` - `sync_guard_missing` - `sync_guard_stale` +- `error_budget_exhausted` +- `error_budget_failure_rate_exceeded` `ops-policy-drift-check.yml` is scheduled hourly and supports manual dispatch. It runs `scripts/Test-ReleaseControlPlanePolicyDrift.ps1` and fails on: - root/payload release-client policy drift @@ -459,6 +467,12 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `release_client_drift` - `runtime_images_missing` - `ops_control_plane_policy_missing` + - `ops_control_plane_schema_version_invalid` + - `ops_control_plane_state_machine_missing` + - `ops_control_plane_state_machine_version_missing` + - `ops_control_plane_rollback_orchestration_missing` + - `ops_control_plane_error_budget_missing` + - `ops_control_plane_error_budget_window_days_invalid` - `ops_control_plane_slo_alert_thresholds_missing` - `ops_control_plane_self_healing_missing` - `ops_control_plane_guardrails_missing` diff --git a/scripts/Invoke-OpsSloSelfHealing.ps1 b/scripts/Invoke-OpsSloSelfHealing.ps1 index c91417b..1e9ed44 100644 --- a/scripts/Invoke-OpsSloSelfHealing.ps1 +++ b/scripts/Invoke-OpsSloSelfHealing.ps1 @@ -17,6 +17,18 @@ param( [ValidateRange(1, 168)] [int]$SyncGuardMaxAgeHours = 12, + [Parameter()] + [ValidateRange(1, 90)] + [int]$ErrorBudgetWindowDays = 7, + + [Parameter()] + [ValidateRange(0, 10000)] + [int]$ErrorBudgetMaxFailedRuns = 0, + + [Parameter()] + [ValidateRange(0, 100)] + [double]$ErrorBudgetMaxFailureRatePct = 0, + [Parameter()] [bool]$AutoRemediate = $true, @@ -57,7 +69,9 @@ param( 'workflow_failure_detected', 'sync_guard_missing', 'sync_guard_stale', - 'slo_gate_runtime_error' + 'slo_gate_runtime_error', + 'error_budget_exhausted', + 'error_budget_failure_rate_exceeded' ), [Parameter()] @@ -196,6 +210,9 @@ function Invoke-SloGateAssessment { [Parameter(Mandatory = $true)][int]$WindowDays, [Parameter(Mandatory = $true)][double]$SuccessThreshold, [Parameter(Mandatory = $true)][int]$SyncGuardHours, + [Parameter(Mandatory = $true)][int]$BudgetWindowDays, + [Parameter(Mandatory = $true)][int]$BudgetMaxFailedRuns, + [Parameter(Mandatory = $true)][double]$BudgetMaxFailureRatePct, [Parameter(Mandatory = $true)][string]$ReportPath ) @@ -207,6 +224,9 @@ function Invoke-SloGateAssessment { -LookbackDays $WindowDays ` -MinSuccessRatePct $SuccessThreshold ` -SyncGuardMaxAgeHours $SyncGuardHours ` + -ErrorBudgetWindowDays $BudgetWindowDays ` + -ErrorBudgetMaxFailedRuns $BudgetMaxFailedRuns ` + -ErrorBudgetMaxFailureRatePct $BudgetMaxFailureRatePct ` -OutputPath $ReportPath | Out-Null $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } } catch { @@ -252,6 +272,11 @@ $report = [ordered]@{ lookback_days = $LookbackDays min_success_rate_pct = $MinSuccessRatePct sync_guard_max_age_hours = $SyncGuardMaxAgeHours + error_budget = [ordered]@{ + lookback_days = $ErrorBudgetWindowDays + max_failed_runs = $ErrorBudgetMaxFailedRuns + max_failure_rate_pct = $ErrorBudgetMaxFailureRatePct + } auto_remediate = [bool]$AutoRemediate remediation_workflow = $RemediationWorkflow remediation_branch = $RemediationBranch @@ -280,6 +305,9 @@ try { -WindowDays $LookbackDays ` -SuccessThreshold $MinSuccessRatePct ` -SyncGuardHours $SyncGuardMaxAgeHours ` + -BudgetWindowDays $ErrorBudgetWindowDays ` + -BudgetMaxFailedRuns $ErrorBudgetMaxFailedRuns ` + -BudgetMaxFailureRatePct $ErrorBudgetMaxFailureRatePct ` -ReportPath $initialPath $initialReport = $initialAssessment.report $report.initial_report = $initialReport @@ -358,6 +386,9 @@ try { -WindowDays $LookbackDays ` -SuccessThreshold $MinSuccessRatePct ` -SyncGuardHours $SyncGuardMaxAgeHours ` + -BudgetWindowDays $ErrorBudgetWindowDays ` + -BudgetMaxFailedRuns $ErrorBudgetMaxFailedRuns ` + -BudgetMaxFailureRatePct $ErrorBudgetMaxFailureRatePct ` -ReportPath $verifyPath $verifyReport = $verifyAssessment.report diff --git a/scripts/Invoke-ReleaseControlPlane.ps1 b/scripts/Invoke-ReleaseControlPlane.ps1 index a82942b..e11e080 100644 --- a/scripts/Invoke-ReleaseControlPlane.ps1 +++ b/scripts/Invoke-ReleaseControlPlane.ps1 @@ -58,10 +58,11 @@ $opsRemediateScript = Join-Path $PSScriptRoot 'Invoke-OpsAutoRemediation.ps1' $dispatchWorkflowScript = Join-Path $PSScriptRoot 'Dispatch-WorkflowAtRemoteHead.ps1' $watchWorkflowScript = Join-Path $PSScriptRoot 'Watch-WorkflowRun.ps1' $canaryHygieneScript = Join-Path $PSScriptRoot 'Invoke-CanarySmokeTagHygiene.ps1' +$rollbackSelfHealingScript = Join-Path $PSScriptRoot 'Invoke-RollbackDrillSelfHealing.ps1' $releaseRunnerLabels = @('self-hosted', 'windows', 'self-hosted-windows-lv') $releaseRunnerLabelsCsv = [string]::Join(',', $releaseRunnerLabels) -foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $dispatchWorkflowScript, $watchWorkflowScript, $canaryHygieneScript)) { +foreach ($requiredScript in @($opsSnapshotScript, $opsRemediateScript, $dispatchWorkflowScript, $watchWorkflowScript, $canaryHygieneScript, $rollbackSelfHealingScript)) { if (-not (Test-Path -LiteralPath $requiredScript -PathType Leaf)) { throw "required_script_missing: $requiredScript" } @@ -283,8 +284,339 @@ function Resolve-StablePromotionWindowPolicy { return $policy } +function Resolve-ControlPlaneGaPolicy { + param( + [Parameter(Mandatory = $true)][string]$ManifestPath + ) + + $warnings = [System.Collections.Generic.List[string]]::new() + $policy = [ordered]@{ + schema_version = '2.0' + source = 'default' + warnings = @() + state_machine = [ordered]@{ + version = '1.0' + initial_state = 'ops_health_preflight' + terminal_states = @('completed', 'failed') + } + rollback_orchestration = [ordered]@{ + enabled = $true + run_on_dry_run = $false + trigger_reason_codes = @( + 'ops_health_gate_failed', + 'ops_unhealthy', + 'release_dispatch_watch_timeout', + 'release_dispatch_watch_failed', + 'release_dispatch_attempts_exhausted', + 'release_verification_failed' + ) + } + rollback_drill = [ordered]@{ + channel = 'canary' + required_history_count = 2 + release_limit = 100 + release_workflow = 'release-workspace-installer.yml' + release_branch = 'main' + watch_timeout_minutes = 120 + canary_sequence_min = 1 + canary_sequence_max = 49 + max_attempts = 1 + } + } + + if (-not (Test-Path -LiteralPath $ManifestPath -PathType Leaf)) { + [void]$warnings.Add("workspace_governance_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + try { + $manifest = Get-Content -LiteralPath $ManifestPath -Raw | ConvertFrom-Json -Depth 100 + $candidatePolicy = $manifest.installer_contract.release_client.ops_control_plane_policy + if ($null -eq $candidatePolicy) { + [void]$warnings.Add("ops_control_plane_policy_missing: path=$ManifestPath") + $policy.warnings = @($warnings) + return $policy + } + + $policy.source = 'workspace_governance' + + $candidateSchema = [string]$candidatePolicy.schema_version + if (-not [string]::IsNullOrWhiteSpace($candidateSchema)) { + $policy.schema_version = $candidateSchema.Trim() + } else { + [void]$warnings.Add('ops_control_plane_policy_schema_version_missing') + } + + $candidateStateMachine = $candidatePolicy.state_machine + if ($null -eq $candidateStateMachine) { + [void]$warnings.Add('ops_control_plane_policy_state_machine_missing') + } else { + $candidateStateMachineVersion = [string]$candidateStateMachine.version + if (-not [string]::IsNullOrWhiteSpace($candidateStateMachineVersion)) { + $policy.state_machine.version = $candidateStateMachineVersion.Trim() + } else { + [void]$warnings.Add('ops_control_plane_policy_state_machine_version_missing') + } + + $candidateInitialState = [string]$candidateStateMachine.initial_state + if (-not [string]::IsNullOrWhiteSpace($candidateInitialState)) { + $policy.state_machine.initial_state = $candidateInitialState.Trim() + } else { + [void]$warnings.Add('ops_control_plane_policy_state_machine_initial_state_missing') + } + + $candidateTerminalStates = @($candidateStateMachine.terminal_states) + if (@($candidateTerminalStates).Count -gt 0) { + $policy.state_machine.terminal_states = @( + $candidateTerminalStates | + ForEach-Object { ([string]$_).Trim() } | + Where-Object { -not [string]::IsNullOrWhiteSpace($_) } | + Select-Object -Unique + ) + } else { + [void]$warnings.Add('ops_control_plane_policy_state_machine_terminal_states_missing') + } + } + + $candidateRollbackOrchestration = $candidatePolicy.rollback_orchestration + if ($null -eq $candidateRollbackOrchestration) { + [void]$warnings.Add('ops_control_plane_policy_rollback_orchestration_missing') + } else { + if ($candidateRollbackOrchestration.enabled -is [bool]) { + $policy.rollback_orchestration.enabled = [bool]$candidateRollbackOrchestration.enabled + } + if ($candidateRollbackOrchestration.run_on_dry_run -is [bool]) { + $policy.rollback_orchestration.run_on_dry_run = [bool]$candidateRollbackOrchestration.run_on_dry_run + } + + $candidateTriggerReasonCodes = @($candidateRollbackOrchestration.trigger_reason_codes) + if (@($candidateTriggerReasonCodes).Count -gt 0) { + $policy.rollback_orchestration.trigger_reason_codes = @( + $candidateTriggerReasonCodes | + ForEach-Object { ([string]$_).Trim() } | + Where-Object { -not [string]::IsNullOrWhiteSpace($_) } | + Select-Object -Unique + ) + } else { + [void]$warnings.Add('ops_control_plane_policy_rollback_orchestration_trigger_reason_codes_missing') + } + } + + $candidateRollbackDrill = $candidatePolicy.rollback_drill + if ($null -ne $candidateRollbackDrill) { + $candidateRollbackChannel = [string]$candidateRollbackDrill.channel + if (-not [string]::IsNullOrWhiteSpace($candidateRollbackChannel)) { + $policy.rollback_drill.channel = $candidateRollbackChannel.Trim() + } + + $candidateRequiredHistoryCount = 0 + if ([int]::TryParse([string]$candidateRollbackDrill.required_history_count, [ref]$candidateRequiredHistoryCount) -and $candidateRequiredHistoryCount -ge 2 -and $candidateRequiredHistoryCount -le 100) { + $policy.rollback_drill.required_history_count = $candidateRequiredHistoryCount + } + + $candidateReleaseLimit = 0 + if ([int]::TryParse([string]$candidateRollbackDrill.release_limit, [ref]$candidateReleaseLimit) -and $candidateReleaseLimit -ge 10 -and $candidateReleaseLimit -le 200) { + $policy.rollback_drill.release_limit = $candidateReleaseLimit + } + } + + $candidateSelfHealing = $candidatePolicy.self_healing + if ($null -ne $candidateSelfHealing) { + $candidateMaxAttempts = 0 + if ([int]::TryParse([string]$candidateSelfHealing.max_attempts, [ref]$candidateMaxAttempts) -and $candidateMaxAttempts -ge 1 -and $candidateMaxAttempts -le 5) { + $policy.rollback_drill.max_attempts = $candidateMaxAttempts + } + + $candidateSelfHealingRollback = $candidateSelfHealing.rollback_drill + if ($null -ne $candidateSelfHealingRollback) { + $candidateReleaseWorkflow = [string]$candidateSelfHealingRollback.release_workflow + if (-not [string]::IsNullOrWhiteSpace($candidateReleaseWorkflow)) { + $policy.rollback_drill.release_workflow = $candidateReleaseWorkflow.Trim() + } + + $candidateReleaseBranch = [string]$candidateSelfHealingRollback.release_branch + if (-not [string]::IsNullOrWhiteSpace($candidateReleaseBranch)) { + $policy.rollback_drill.release_branch = $candidateReleaseBranch.Trim() + } + + $candidateWatchTimeout = 0 + if ([int]::TryParse([string]$candidateSelfHealingRollback.watch_timeout_minutes, [ref]$candidateWatchTimeout) -and $candidateWatchTimeout -ge 5 -and $candidateWatchTimeout -le 240) { + $policy.rollback_drill.watch_timeout_minutes = $candidateWatchTimeout + } + + $candidateCanarySequenceMin = 0 + if ([int]::TryParse([string]$candidateSelfHealingRollback.canary_sequence_min, [ref]$candidateCanarySequenceMin) -and $candidateCanarySequenceMin -ge 1 -and $candidateCanarySequenceMin -le 49) { + $policy.rollback_drill.canary_sequence_min = $candidateCanarySequenceMin + } + + $candidateCanarySequenceMax = 0 + if ([int]::TryParse([string]$candidateSelfHealingRollback.canary_sequence_max, [ref]$candidateCanarySequenceMax) -and $candidateCanarySequenceMax -ge $policy.rollback_drill.canary_sequence_min -and $candidateCanarySequenceMax -le 99) { + $policy.rollback_drill.canary_sequence_max = $candidateCanarySequenceMax + } + } + } + } catch { + [void]$warnings.Add("ops_control_plane_policy_load_failed: $([string]$_.Exception.Message)") + } + + $policy.warnings = @($warnings) + return $policy +} + +function Add-ControlPlaneStateTransition { + param( + [Parameter(Mandatory = $true)]$StateMachine, + [Parameter(Mandatory = $true)][string]$FromState, + [Parameter(Mandatory = $true)][string]$Result, + [Parameter(Mandatory = $true)][string]$ToState, + [Parameter()][string]$ReasonCode = '', + [Parameter()][string]$Detail = '' + ) + + if ($null -eq $StateMachine) { + return + } + + $transitions = [System.Collections.Generic.List[object]]::new() + foreach ($existing in @($StateMachine.transitions_executed)) { + [void]$transitions.Add($existing) + } + + [void]$transitions.Add([ordered]@{ + timestamp_utc = Get-UtcNowIso + from_state = $FromState + result = $Result + to_state = $ToState + reason_code = $ReasonCode + detail = $Detail + }) + + $StateMachine.transitions_executed = @($transitions) + $StateMachine.current_state = $ToState +} + +function Should-AttemptRollbackOrchestration { + param( + [Parameter(Mandatory = $true)][string]$ReasonCode, + [Parameter(Mandatory = $true)]$Policy, + [Parameter(Mandatory = $true)][bool]$DryRunEnabled, + [Parameter(Mandatory = $true)][bool]$AutoRemediateEnabled + ) + + if ($null -eq $Policy) { + return [ordered]@{ + should_attempt = $false + decision_reason = 'rollback_policy_missing' + } + } + + if (-not [bool]$AutoRemediateEnabled) { + return [ordered]@{ + should_attempt = $false + decision_reason = 'auto_remediate_disabled' + } + } + + if (-not [bool]$Policy.enabled) { + return [ordered]@{ + should_attempt = $false + decision_reason = 'rollback_policy_disabled' + } + } + + if ([bool]$DryRunEnabled -and -not [bool]$Policy.run_on_dry_run) { + return [ordered]@{ + should_attempt = $false + decision_reason = 'rollback_dry_run_blocked' + } + } + + if (@($Policy.trigger_reason_codes) -notcontains [string]$ReasonCode) { + return [ordered]@{ + should_attempt = $false + decision_reason = 'rollback_reason_not_allowed' + } + } + + return [ordered]@{ + should_attempt = $true + decision_reason = 'rollback_triggered' + } +} + +function Invoke-ControlPlaneRollbackOrchestration { + param( + [Parameter(Mandatory = $true)][string]$TargetRepository, + [Parameter(Mandatory = $true)][string]$TargetBranch, + [Parameter(Mandatory = $true)]$RollbackPolicy, + [Parameter(Mandatory = $true)][string]$ScratchRoot + ) + + $rollbackReportPath = Join-Path $ScratchRoot 'rollback-orchestration-report.json' + $executionError = '' + $exitCode = 1 + + try { + & pwsh -NoProfile -File $rollbackSelfHealingScript ` + -Repository $TargetRepository ` + -Branch $TargetBranch ` + -Channel ([string]$RollbackPolicy.channel) ` + -RequiredHistoryCount ([int]$RollbackPolicy.required_history_count) ` + -ReleaseLimit ([int]$RollbackPolicy.release_limit) ` + -AutoRemediate:$true ` + -ReleaseWorkflowFile ([string]$RollbackPolicy.release_workflow) ` + -MaxAttempts ([int]$RollbackPolicy.max_attempts) ` + -WatchTimeoutMinutes ([int]$RollbackPolicy.watch_timeout_minutes) ` + -CanarySequenceMin ([int]$RollbackPolicy.canary_sequence_min) ` + -CanarySequenceMax ([int]$RollbackPolicy.canary_sequence_max) ` + -CanaryTagFamily 'semver' ` + -OutputPath $rollbackReportPath | Out-Null + $exitCode = if ($null -eq $LASTEXITCODE) { 0 } else { [int]$LASTEXITCODE } + } catch { + $executionError = [string]$_.Exception.Message + $exitCode = 1 + } + + $rollbackReport = $null + if (Test-Path -LiteralPath $rollbackReportPath -PathType Leaf) { + $rollbackReport = Get-Content -LiteralPath $rollbackReportPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + + if ($null -eq $rollbackReport) { + $rollbackReport = [ordered]@{ + status = 'fail' + reason_code = 'rollback_orchestration_report_missing' + message = if ([string]::IsNullOrWhiteSpace($executionError)) { 'rollback orchestration report missing.' } else { $executionError } + } + } + + if (-not [string]::IsNullOrWhiteSpace($executionError)) { + $rollbackReport.status = 'fail' + $rollbackReport.reason_code = 'rollback_orchestration_runtime_error' + $rollbackReport.message = $executionError + } + + return [ordered]@{ + status = if ($exitCode -eq 0 -and [string]$rollbackReport.status -eq 'pass') { 'pass' } else { 'fail' } + exit_code = $exitCode + report_path = $rollbackReportPath + report = $rollbackReport + } +} + $defaultSemverOnlyEnforceUtc = [DateTimeOffset]::Parse('2026-07-01T00:00:00Z') $workspaceGovernancePath = Join-Path (Split-Path -Parent $PSScriptRoot) 'workspace-governance.json' +$gaPolicy = Resolve-ControlPlaneGaPolicy -ManifestPath $workspaceGovernancePath +$script:opsControlPlanePolicySchemaVersion = [string]$gaPolicy.schema_version +$script:opsControlPlanePolicySource = [string]$gaPolicy.source +$script:controlPlaneStateMachinePolicy = $gaPolicy.state_machine +$script:rollbackOrchestrationPolicy = $gaPolicy.rollback_orchestration +$script:rollbackDrillPolicy = $gaPolicy.rollback_drill +foreach ($warning in @($gaPolicy.warnings)) { + Write-Warning "[control_plane_policy_warning] $warning" +} + $semverPolicy = Resolve-SemVerEnforcementPolicy -ManifestPath $workspaceGovernancePath -FallbackEnforceUtc $defaultSemverOnlyEnforceUtc $script:semverOnlyEnforceUtc = [DateTimeOffset]$semverPolicy.semver_only_enforce_utc $script:semverPolicySource = [string]$semverPolicy.source @@ -1594,6 +1926,30 @@ $report = [ordered]@{ keep_latest_canary_n = $KeepLatestCanaryN tag_strategy = 'semver' migration_mode = 'dual_mode_publish_semver_control_plane' + control_plane_policy_schema_version = [string]$script:opsControlPlanePolicySchemaVersion + control_plane_policy_source = [string]$script:opsControlPlanePolicySource + state_machine = [ordered]@{ + version = [string]$script:controlPlaneStateMachinePolicy.version + initial_state = [string]$script:controlPlaneStateMachinePolicy.initial_state + current_state = [string]$script:controlPlaneStateMachinePolicy.initial_state + terminal_states = @($script:controlPlaneStateMachinePolicy.terminal_states) + transitions_executed = @() + } + rollback_orchestration = [ordered]@{ + policy_enabled = [bool]$script:rollbackOrchestrationPolicy.enabled + policy_run_on_dry_run = [bool]$script:rollbackOrchestrationPolicy.run_on_dry_run + trigger_reason_codes = @($script:rollbackOrchestrationPolicy.trigger_reason_codes) + attempted = $false + status = 'not_run' + reason_code = '' + message = '' + report_path = '' + report = $null + decision = [ordered]@{ + should_attempt = $false + decision_reason = 'not_evaluated' + } + } semver_policy_source = $script:semverPolicySource semver_only_enforce_utc = $script:semverOnlyEnforceUtc.ToString('yyyy-MM-ddTHH:mm:ssZ') semver_only_enforced = [bool]$script:semverOnlyEnforced @@ -1622,6 +1978,13 @@ $report = [ordered]@{ } try { + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'start' ` + -Result 'enter' ` + -ToState ([string]$report.state_machine.initial_state) ` + -ReasonCode 'control_plane_start' + $preHealthPath = Join-Path $scratchRoot 'pre-health.json' $healthy = $false try { @@ -1641,6 +2004,22 @@ try { $report.pre_health = Get-Content -LiteralPath $preHealthPath -Raw | ConvertFrom-Json -ErrorAction Stop } + if ($healthy) { + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'ops_health_preflight' ` + -Result 'pass' ` + -ToState 'ops_health_verify' ` + -ReasonCode 'pre_health_pass' + } else { + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'ops_health_preflight' ` + -Result 'fail' ` + -ToState (if ($AutoRemediate) { 'auto_remediation' } else { 'ops_health_verify' }) ` + -ReasonCode 'pre_health_fail' + } + if (-not $healthy -and $AutoRemediate) { $remediationPath = Join-Path $scratchRoot 'remediation.json' & pwsh -NoProfile -File $opsRemediateScript ` @@ -1651,6 +2030,12 @@ try { if (Test-Path -LiteralPath $remediationPath -PathType Leaf) { $report.remediation = Get-Content -LiteralPath $remediationPath -Raw | ConvertFrom-Json -ErrorAction Stop } + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'auto_remediation' ` + -Result (if ($null -ne $report.remediation -and [string]$report.remediation.status -eq 'pass') { 'pass' } else { 'fail' }) ` + -ToState 'ops_health_verify' ` + -ReasonCode (if ($null -ne $report.remediation) { [string]$report.remediation.reason_code } else { 'remediation_report_missing' }) } $postHealthPath = Join-Path $scratchRoot 'post-health.json' @@ -1667,11 +2052,23 @@ try { if ([string]$report.post_health.status -ne 'pass') { throw "ops_unhealthy: reason_codes=$([string]::Join(',', @($report.post_health.reason_codes)))" } + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'ops_health_verify' ` + -Result 'pass' ` + -ToState 'release_dispatch' ` + -ReasonCode 'post_health_pass' if ($Mode -eq 'Validate') { $report.status = 'pass' $report.reason_code = if ($DryRun) { 'validate_dry_run' } else { 'validated' } $report.message = 'Release control plane validation completed without dispatch.' + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState ([string]$report.state_machine.current_state) ` + -Result 'pass' ` + -ToState 'completed' ` + -ReasonCode ([string]$report.reason_code) } else { $dateKey = (Get-Date).ToUniversalTime().ToString('yyyyMMdd') $executionList = [System.Collections.Generic.List[object]]::new() @@ -1712,6 +2109,12 @@ try { $report.status = 'pass' $report.reason_code = if ($DryRun) { 'dry_run' } else { 'completed' } $report.message = 'Release control plane completed.' + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'release_dispatch' ` + -Result 'pass' ` + -ToState 'completed' ` + -ReasonCode ([string]$report.reason_code) } } catch { @@ -1719,6 +2122,77 @@ catch { $report.status = 'fail' $report.reason_code = Resolve-ControlPlaneFailureReasonCode -MessageText $failureMessage $report.message = $failureMessage + + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState ([string]$report.state_machine.current_state) ` + -Result 'fail' ` + -ToState 'rollback_orchestration' ` + -ReasonCode ([string]$report.reason_code) ` + -Detail $failureMessage + + $rollbackDecision = Should-AttemptRollbackOrchestration ` + -ReasonCode ([string]$report.reason_code) ` + -Policy $script:rollbackOrchestrationPolicy ` + -DryRunEnabled ([bool]$DryRun) ` + -AutoRemediateEnabled ([bool]$AutoRemediate) + $report.rollback_orchestration.decision = $rollbackDecision + + if ([bool]$rollbackDecision.should_attempt) { + $report.rollback_orchestration.attempted = $true + + try { + $rollbackResult = Invoke-ControlPlaneRollbackOrchestration ` + -TargetRepository $Repository ` + -TargetBranch ([string]$script:rollbackDrillPolicy.release_branch) ` + -RollbackPolicy $script:rollbackDrillPolicy ` + -ScratchRoot $scratchRoot + + $report.rollback_orchestration.status = [string]$rollbackResult.status + $report.rollback_orchestration.report_path = [string]$rollbackResult.report_path + $report.rollback_orchestration.report = $rollbackResult.report + $report.rollback_orchestration.reason_code = [string]$rollbackResult.report.reason_code + $report.rollback_orchestration.message = [string]$rollbackResult.report.message + + if ([string]$rollbackResult.status -eq 'pass') { + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'rollback_orchestration' ` + -Result 'pass' ` + -ToState 'failed_recovered' ` + -ReasonCode 'rollback_orchestration_recovered' + } else { + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'rollback_orchestration' ` + -Result 'fail' ` + -ToState 'failed' ` + -ReasonCode ([string]$report.rollback_orchestration.reason_code) + } + } catch { + $report.rollback_orchestration.status = 'fail' + $report.rollback_orchestration.reason_code = 'rollback_orchestration_runtime_error' + $report.rollback_orchestration.message = [string]$_.Exception.Message + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'rollback_orchestration' ` + -Result 'fail' ` + -ToState 'failed' ` + -ReasonCode 'rollback_orchestration_runtime_error' ` + -Detail ([string]$_.Exception.Message) + } + } else { + $report.rollback_orchestration.attempted = $false + $report.rollback_orchestration.status = 'skipped' + $report.rollback_orchestration.reason_code = [string]$rollbackDecision.decision_reason + $report.rollback_orchestration.message = 'Rollback orchestration skipped by policy decision.' + Add-ControlPlaneStateTransition ` + -StateMachine $report.state_machine ` + -FromState 'rollback_orchestration' ` + -Result 'fail' ` + -ToState 'failed' ` + -ReasonCode ([string]$rollbackDecision.decision_reason) + } } finally { Write-WorkflowOpsReport -Report $report -OutputPath $OutputPath | Out-Null diff --git a/scripts/Test-OpsSloGate.ps1 b/scripts/Test-OpsSloGate.ps1 index 7214273..8383660 100644 --- a/scripts/Test-OpsSloGate.ps1 +++ b/scripts/Test-OpsSloGate.ps1 @@ -21,6 +21,18 @@ param( [ValidateRange(1, 168)] [int]$SyncGuardMaxAgeHours = 12, + [Parameter()] + [ValidateRange(1, 90)] + [int]$ErrorBudgetWindowDays = 7, + + [Parameter()] + [ValidateRange(0, 10000)] + [int]$ErrorBudgetMaxFailedRuns = 0, + + [Parameter()] + [ValidateRange(0, 100)] + [double]$ErrorBudgetMaxFailureRatePct = 0, + [Parameter()] [ValidateNotNullOrEmpty()] [string[]]$RequiredWorkflows = @( @@ -57,6 +69,16 @@ $report = [ordered]@{ lookback_days = $LookbackDays min_success_rate_pct = $MinSuccessRatePct sync_guard_max_age_hours = $SyncGuardMaxAgeHours + error_budget = [ordered]@{ + lookback_days = $ErrorBudgetWindowDays + max_failed_runs = $ErrorBudgetMaxFailedRuns + max_failure_rate_pct = $ErrorBudgetMaxFailureRatePct + total_completed_runs = 0 + total_failed_runs = 0 + failure_rate_pct = 0 + status = 'unknown' + reason_codes = @() + } required_workflows = @($RequiredWorkflows) status = 'fail' reason_codes = @() @@ -91,6 +113,20 @@ try { $sloReport = Get-Content -LiteralPath $sloPath -Raw | ConvertFrom-Json -ErrorAction Stop $report.source_slo_report = $sloReport + $errorBudgetSloReport = $sloReport + if ($ErrorBudgetWindowDays -ne $LookbackDays) { + $errorBudgetPath = Join-Path $scratchRoot 'error-budget-ops-slo-report.json' + & pwsh -NoProfile -File $sloScript ` + -SurfaceRepository $SurfaceRepository ` + -SyncGuardRepository $SyncGuardRepository ` + -LookbackDays $ErrorBudgetWindowDays ` + -OutputPath $errorBudgetPath + if ($LASTEXITCODE -ne 0) { + throw "error_budget_report_generation_failed: exit_code=$LASTEXITCODE" + } + $errorBudgetSloReport = Get-Content -LiteralPath $errorBudgetPath -Raw | ConvertFrom-Json -ErrorAction Stop + } + $workflowEvaluations = [System.Collections.Generic.List[object]]::new() foreach ($workflowName in @($RequiredWorkflows)) { $candidate = @($sloReport.workflows | Where-Object { [string]$_.workflow -eq [string]$workflowName } | Select-Object -First 1) @@ -158,6 +194,39 @@ try { } $report.sync_guard_evaluation = $syncGuardEvaluation + $errorBudgetReasons = [System.Collections.Generic.List[string]]::new() + $totalCompletedRuns = 0 + $totalFailedRuns = 0 + foreach ($workflowName in @($RequiredWorkflows)) { + $errorBudgetRecord = @($errorBudgetSloReport.workflows | Where-Object { [string]$_.workflow -eq [string]$workflowName } | Select-Object -First 1) + if (@($errorBudgetRecord).Count -ne 1) { + continue + } + $totalCompletedRuns += [int]$errorBudgetRecord[0].completed_runs + $totalFailedRuns += [int]$errorBudgetRecord[0].failure_runs + } + + $failureRatePct = if ($totalCompletedRuns -le 0) { 0.0 } else { [Math]::Round((($totalFailedRuns / $totalCompletedRuns) * 100), 2) } + if ($totalFailedRuns -gt $ErrorBudgetMaxFailedRuns) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'error_budget_exhausted' + [void]$errorBudgetReasons.Add('max_failed_runs_exceeded') + } + if ($failureRatePct -gt $ErrorBudgetMaxFailureRatePct) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'error_budget_failure_rate_exceeded' + [void]$errorBudgetReasons.Add('max_failure_rate_pct_exceeded') + } + + $report.error_budget = [ordered]@{ + lookback_days = $ErrorBudgetWindowDays + max_failed_runs = $ErrorBudgetMaxFailedRuns + max_failure_rate_pct = $ErrorBudgetMaxFailureRatePct + total_completed_runs = $totalCompletedRuns + total_failed_runs = $totalFailedRuns + failure_rate_pct = $failureRatePct + status = if (@($errorBudgetReasons).Count -eq 0) { 'pass' } else { 'fail' } + reason_codes = @($errorBudgetReasons) + } + if ($reasonCodes.Count -eq 0) { $report.status = 'pass' $report.reason_codes = @('ok') diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index ad317e7..d0e214c 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -169,6 +169,7 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) Add-Check -Scope 'manifest' -Name 'release_client_runtime_images_ops_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_exists' -Passed ($null -ne $releaseClient.ops_control_plane_policy) -Detail 'installer_contract.release_client.ops_control_plane_policy' + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_schema_version' -Passed ([string]$releaseClient.ops_control_plane_policy.schema_version -eq '2.0') -Detail ([string]$releaseClient.ops_control_plane_policy.schema_version) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) @@ -180,6 +181,17 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_sync_guard_missing' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_missing') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_sync_guard_stale' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_stale') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_alert_thresholds_critical_reason_slo_gate_runtime_error' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'slo_gate_runtime_error') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_error_budget_window_days' -Passed ([int]$releaseClient.ops_control_plane_policy.error_budget.window_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.window_days) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_error_budget_max_failed_runs' -Passed ([int]$releaseClient.ops_control_plane_policy.error_budget.max_failed_runs -eq 0) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.max_failed_runs) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_error_budget_max_failure_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.error_budget.max_failure_rate_pct -eq 0) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.max_failure_rate_pct) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_error_budget_critical_burn_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.error_budget.critical_burn_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.critical_burn_rate_pct) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_state_machine_version' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.version -eq '1.0') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.version) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_state_machine_initial_state' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.initial_state -eq 'ops_health_preflight') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.initial_state) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_state_machine_preflight_on_pass' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_pass -eq 'release_dispatch') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_pass) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_state_machine_preflight_on_fail' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_fail -eq 'auto_remediation') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_fail) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_orchestration_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_orchestration_trigger_watch_timeout' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_dispatch_watch_timeout') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_orchestration_trigger_release_verification_failed' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_verification_failed') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 1f33622..13c0449 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -92,6 +92,7 @@ if ($null -ne $releaseClient) { Add-Check -Name 'runtime_images_ops_runtime_base_repository' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_repository -eq 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_repository) Add-Check -Name 'runtime_images_ops_runtime_base_digest' -Passed ([string]$releaseClient.runtime_images.ops_runtime.base_digest -eq 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423') -Detail ([string]$releaseClient.runtime_images.ops_runtime.base_digest) Add-Check -Name 'ops_control_plane_policy_exists' -Passed ($null -ne $releaseClient.ops_control_plane_policy) -Detail 'installer_contract.release_client.ops_control_plane_policy' + Add-Check -Name 'ops_policy_schema_version' -Passed ([string]$releaseClient.ops_control_plane_policy.schema_version -eq '2.0') -Detail ([string]$releaseClient.ops_control_plane_policy.schema_version) Add-Check -Name 'ops_policy_slo_lookback_days' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.lookback_days) Add-Check -Name 'ops_policy_slo_min_success_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct) Add-Check -Name 'ops_policy_slo_max_sync_guard_age_hours' -Passed ([int]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours -eq 12) -Detail ([string]$releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours) @@ -103,6 +104,17 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_sync_guard_missing' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_missing') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_sync_guard_stale' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'sync_guard_stale') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) Add-Check -Name 'ops_policy_slo_alert_thresholds_critical_reason_slo_gate_runtime_error' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) -contains 'slo_gate_runtime_error') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes))) + Add-Check -Name 'ops_policy_error_budget_window_days' -Passed ([int]$releaseClient.ops_control_plane_policy.error_budget.window_days -eq 7) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.window_days) + Add-Check -Name 'ops_policy_error_budget_max_failed_runs' -Passed ([int]$releaseClient.ops_control_plane_policy.error_budget.max_failed_runs -eq 0) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.max_failed_runs) + Add-Check -Name 'ops_policy_error_budget_max_failure_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.error_budget.max_failure_rate_pct -eq 0) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.max_failure_rate_pct) + Add-Check -Name 'ops_policy_error_budget_critical_burn_rate_pct' -Passed ([double]$releaseClient.ops_control_plane_policy.error_budget.critical_burn_rate_pct -eq 100) -Detail ([string]$releaseClient.ops_control_plane_policy.error_budget.critical_burn_rate_pct) + Add-Check -Name 'ops_policy_state_machine_version' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.version -eq '1.0') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.version) + Add-Check -Name 'ops_policy_state_machine_initial_state' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.initial_state -eq 'ops_health_preflight') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.initial_state) + Add-Check -Name 'ops_policy_state_machine_transition_release_dispatch_on_pass' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_pass -eq 'release_dispatch') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_pass) + Add-Check -Name 'ops_policy_state_machine_transition_auto_remediation_on_fail' -Passed ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_fail -eq 'auto_remediation') -Detail ([string]$releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_fail) + Add-Check -Name 'ops_policy_rollback_orchestration_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) + Add-Check -Name 'ops_policy_rollback_orchestration_trigger_release_dispatch_watch_timeout' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_dispatch_watch_timeout') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) + Add-Check -Name 'ops_policy_rollback_orchestration_trigger_release_verification_failed' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_verification_failed') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) Add-Check -Name 'ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Name 'ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Name 'ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index 129c5d9..e9a4d56 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -92,6 +92,60 @@ try { if (-not $opsPolicyPresent) { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_policy_missing' } else { + $policySchemaVersionValid = ([string]$releaseClient.ops_control_plane_policy.schema_version -eq '2.0') + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_schema_version_valid' + passed = $policySchemaVersionValid + }) | Out-Null + if (-not $policySchemaVersionValid) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_schema_version_invalid' + } + + $stateMachinePresent = ($null -ne $releaseClient.ops_control_plane_policy.state_machine) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_state_machine_present' + passed = $stateMachinePresent + }) | Out-Null + if (-not $stateMachinePresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_state_machine_missing' + } else { + $stateMachineVersionPresent = (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.state_machine.version)) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_state_machine_version_present' + passed = $stateMachineVersionPresent + }) | Out-Null + if (-not $stateMachineVersionPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_state_machine_version_missing' + } + } + + $rollbackOrchestrationPresent = ($null -ne $releaseClient.ops_control_plane_policy.rollback_orchestration) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_rollback_orchestration_present' + passed = $rollbackOrchestrationPresent + }) | Out-Null + if (-not $rollbackOrchestrationPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_rollback_orchestration_missing' + } + + $errorBudgetPresent = ($null -ne $releaseClient.ops_control_plane_policy.error_budget) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_error_budget_present' + passed = $errorBudgetPresent + }) | Out-Null + if (-not $errorBudgetPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_error_budget_missing' + } else { + $errorBudgetWindowValid = ([int]$releaseClient.ops_control_plane_policy.error_budget.window_days -ge 1) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_error_budget_window_days_valid' + passed = $errorBudgetWindowValid + }) | Out-Null + if (-not $errorBudgetWindowValid) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_error_budget_window_days_invalid' + } + } + $sloAlertThresholdsPresent = ($null -ne $releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds) $checks.Add([ordered]@{ check = 'release_client_ops_control_plane_policy_slo_alert_thresholds_present' diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index 4c83ed2..9726b97 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -39,6 +39,12 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'release_client_drift' $script:runtimeContent | Should -Match 'runtime_images_missing' $script:runtimeContent | Should -Match 'ops_control_plane_policy_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_schema_version_invalid' + $script:runtimeContent | Should -Match 'ops_control_plane_state_machine_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_state_machine_version_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_rollback_orchestration_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_error_budget_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_error_budget_window_days_invalid' $script:runtimeContent | Should -Match 'ops_control_plane_slo_alert_thresholds_missing' $script:runtimeContent | Should -Match 'ops_control_plane_self_healing_missing' $script:runtimeContent | Should -Match 'ops_control_plane_guardrails_missing' diff --git a/tests/OpsSloGateWorkflowContract.Tests.ps1 b/tests/OpsSloGateWorkflowContract.Tests.ps1 index 0a3a039..7624224 100644 --- a/tests/OpsSloGateWorkflowContract.Tests.ps1 +++ b/tests/OpsSloGateWorkflowContract.Tests.ps1 @@ -27,6 +27,9 @@ Describe 'Ops SLO gate workflow contract' { $script:workflowContent | Should -Match 'lookback_days' $script:workflowContent | Should -Match 'min_success_rate_pct' $script:workflowContent | Should -Match 'sync_guard_max_age_hours' + $script:workflowContent | Should -Match 'error_budget_window_days' + $script:workflowContent | Should -Match 'error_budget_max_failed_runs' + $script:workflowContent | Should -Match 'error_budget_max_failure_rate_pct' $script:workflowContent | Should -Match 'auto_self_heal' $script:workflowContent | Should -Match 'self_heal_max_attempts' $script:workflowContent | Should -Match 'self_heal_watch_timeout_minutes' @@ -55,6 +58,9 @@ Describe 'Ops SLO gate workflow contract' { $script:runtimeContent | Should -Match 'workflow_success_rate_below_threshold' $script:runtimeContent | Should -Match 'sync_guard_stale' $script:runtimeContent | Should -Match 'sync_guard_missing' + $script:runtimeContent | Should -Match 'error_budget_exhausted' + $script:runtimeContent | Should -Match 'error_budget_failure_rate_exceeded' + $script:runtimeContent | Should -Match 'error_budget' } It 'runs bounded SLO self-healing playbook with deterministic outcomes' { @@ -64,6 +70,9 @@ Describe 'Ops SLO gate workflow contract' { $script:selfHealingContent | Should -Match '\$dispatchInputs = @\(' $script:selfHealingContent | Should -Match '-Inputs \$dispatchInputs' $script:selfHealingContent | Should -Match 'sync_guard_max_age_hours' + $script:selfHealingContent | Should -Match 'ErrorBudgetWindowDays' + $script:selfHealingContent | Should -Match 'ErrorBudgetMaxFailedRuns' + $script:selfHealingContent | Should -Match 'ErrorBudgetMaxFailureRatePct' $script:selfHealingContent | Should -Match 'warning_min_success_rate_pct' $script:selfHealingContent | Should -Match 'critical_min_success_rate_pct' $script:selfHealingContent | Should -Match 'alert_severity' diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index 272c843..e3d594c 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -52,6 +52,7 @@ Describe 'Release client policy contract' { $releaseClient.runtime_images.ops_runtime.repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-surface-ops' $releaseClient.runtime_images.ops_runtime.base_repository | Should -Be 'ghcr.io/labview-community-ci-cd/labview-cdev-cli-runtime' $releaseClient.runtime_images.ops_runtime.base_digest | Should -Be 'sha256:0506e8789680ce1c941ca9f005b75d804150aed6ad36a5ac59458b802d358423' + $releaseClient.ops_control_plane_policy.schema_version | Should -Be '2.0' $releaseClient.ops_control_plane_policy.slo_gate.lookback_days | Should -Be 7 $releaseClient.ops_control_plane_policy.slo_gate.min_success_rate_pct | Should -Be 100 $releaseClient.ops_control_plane_policy.slo_gate.max_sync_guard_age_hours | Should -Be 12 @@ -63,6 +64,19 @@ Describe 'Release client policy contract' { @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'sync_guard_missing' @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'sync_guard_stale' @($releaseClient.ops_control_plane_policy.slo_gate.alert_thresholds.critical_reason_codes) | Should -Contain 'slo_gate_runtime_error' + $releaseClient.ops_control_plane_policy.error_budget.window_days | Should -Be 7 + $releaseClient.ops_control_plane_policy.error_budget.max_failed_runs | Should -Be 0 + $releaseClient.ops_control_plane_policy.error_budget.max_failure_rate_pct | Should -Be 0 + $releaseClient.ops_control_plane_policy.error_budget.critical_burn_rate_pct | Should -Be 100 + $releaseClient.ops_control_plane_policy.state_machine.version | Should -Be '1.0' + $releaseClient.ops_control_plane_policy.state_machine.initial_state | Should -Be 'ops_health_preflight' + @($releaseClient.ops_control_plane_policy.state_machine.terminal_states) | Should -Contain 'completed' + @($releaseClient.ops_control_plane_policy.state_machine.terminal_states) | Should -Contain 'failed' + $releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_pass | Should -Be 'release_dispatch' + $releaseClient.ops_control_plane_policy.state_machine.transitions.ops_health_preflight.on_fail | Should -Be 'auto_remediation' + $releaseClient.ops_control_plane_policy.rollback_orchestration.enabled | Should -BeTrue + @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) | Should -Contain 'release_dispatch_watch_timeout' + @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) | Should -Contain 'release_verification_failed' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-monitoring' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-autoremediate' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'release-control-plane' @@ -118,9 +132,13 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'runtime_images_cdev_cli_runtime_canonical_repository' $script:policyScriptContent | Should -Match 'runtime_images_ops_runtime_base_digest' $script:policyScriptContent | Should -Match 'ops_control_plane_policy_exists' + $script:policyScriptContent | Should -Match 'ops_policy_schema_version' $script:policyScriptContent | Should -Match 'ops_policy_slo_min_success_rate_pct' $script:policyScriptContent | Should -Match 'ops_policy_slo_alert_thresholds_warning_min_success_rate_pct' $script:policyScriptContent | Should -Match 'ops_policy_slo_alert_thresholds_critical_reason_slo_gate_runtime_error' + $script:policyScriptContent | Should -Match 'ops_policy_error_budget_window_days' + $script:policyScriptContent | Should -Match 'ops_policy_state_machine_version' + $script:policyScriptContent | Should -Match 'ops_policy_rollback_orchestration_enabled' $script:policyScriptContent | Should -Match 'ops_policy_tag_strategy_semver_only_enforce' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_full_cycle_weekday_monday' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_pattern_exists' diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index 6422d53..bfb0bca 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -62,6 +62,10 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'Get-ReleaseByTagOrNull' $script:runtimeContent | Should -Match 'Resolve-SemVerEnforcementPolicy' $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowPolicy' + $script:runtimeContent | Should -Match 'Resolve-ControlPlaneGaPolicy' + $script:runtimeContent | Should -Match 'Add-ControlPlaneStateTransition' + $script:runtimeContent | Should -Match 'Should-AttemptRollbackOrchestration' + $script:runtimeContent | Should -Match 'Invoke-ControlPlaneRollbackOrchestration' $script:runtimeContent | Should -Match 'Resolve-StablePromotionWindowDecision' $script:runtimeContent | Should -Match 'Write-StableOverrideAuditReport' $script:runtimeContent | Should -Match 'Resolve-ControlPlaneFailureReasonCode' @@ -82,6 +86,11 @@ Describe 'Release control plane workflow contract' { $script:runtimeContent | Should -Match 'release_verification_asset_missing' $script:runtimeContent | Should -Match 'release_verification_manifest_channel_mismatch' $script:runtimeContent | Should -Match 'release_verification_failed' + $script:runtimeContent | Should -Match 'rollback_orchestration' + $script:runtimeContent | Should -Match 'rollback_orchestration_recovered' + $script:runtimeContent | Should -Match 'rollback_orchestration_runtime_error' + $script:runtimeContent | Should -Match 'state_machine' + $script:runtimeContent | Should -Match 'control_plane_policy_schema_version' $script:runtimeContent | Should -Match 'promotion_lineage_invalid' $script:runtimeContent | Should -Match 'promotion_source_missing' $script:runtimeContent | Should -Match 'promotion_source_asset_missing' diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 7346be1..48178a7 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -254,6 +254,7 @@ } }, "ops_control_plane_policy": { + "schema_version": "2.0", "slo_gate": { "lookback_days": 7, "min_success_rate_pct": 100, @@ -278,6 +279,54 @@ "release-control-plane" ] }, + "error_budget": { + "window_days": 7, + "max_failed_runs": 0, + "max_failure_rate_pct": 0, + "critical_burn_rate_pct": 100 + }, + "state_machine": { + "version": "1.0", + "initial_state": "ops_health_preflight", + "terminal_states": [ + "completed", + "failed" + ], + "transitions": { + "ops_health_preflight": { + "on_pass": "release_dispatch", + "on_fail": "auto_remediation" + }, + "auto_remediation": { + "on_pass": "ops_health_verify", + "on_fail": "rollback_orchestration" + }, + "ops_health_verify": { + "on_pass": "release_dispatch", + "on_fail": "rollback_orchestration" + }, + "release_dispatch": { + "on_pass": "completed", + "on_fail": "rollback_orchestration" + }, + "rollback_orchestration": { + "on_pass": "failed_recovered", + "on_fail": "failed" + } + } + }, + "rollback_orchestration": { + "enabled": true, + "trigger_reason_codes": [ + "ops_health_gate_failed", + "ops_unhealthy", + "release_dispatch_watch_timeout", + "release_dispatch_watch_failed", + "release_dispatch_attempts_exhausted", + "release_verification_failed" + ], + "run_on_dry_run": false + }, "incident_lifecycle": { "auto_close_on_recovery": true, "reopen_on_regression": true, diff --git a/workspace-governance.json b/workspace-governance.json index 7346be1..48178a7 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -254,6 +254,7 @@ } }, "ops_control_plane_policy": { + "schema_version": "2.0", "slo_gate": { "lookback_days": 7, "min_success_rate_pct": 100, @@ -278,6 +279,54 @@ "release-control-plane" ] }, + "error_budget": { + "window_days": 7, + "max_failed_runs": 0, + "max_failure_rate_pct": 0, + "critical_burn_rate_pct": 100 + }, + "state_machine": { + "version": "1.0", + "initial_state": "ops_health_preflight", + "terminal_states": [ + "completed", + "failed" + ], + "transitions": { + "ops_health_preflight": { + "on_pass": "release_dispatch", + "on_fail": "auto_remediation" + }, + "auto_remediation": { + "on_pass": "ops_health_verify", + "on_fail": "rollback_orchestration" + }, + "ops_health_verify": { + "on_pass": "release_dispatch", + "on_fail": "rollback_orchestration" + }, + "release_dispatch": { + "on_pass": "completed", + "on_fail": "rollback_orchestration" + }, + "rollback_orchestration": { + "on_pass": "failed_recovered", + "on_fail": "failed" + } + } + }, + "rollback_orchestration": { + "enabled": true, + "trigger_reason_codes": [ + "ops_health_gate_failed", + "ops_unhealthy", + "release_dispatch_watch_timeout", + "release_dispatch_watch_failed", + "release_dispatch_attempts_exhausted", + "release_verification_failed" + ], + "run_on_dry_run": false + }, "incident_lifecycle": { "auto_close_on_recovery": true, "reopen_on_regression": true, From 8930131e50db404eb4e7591f889258ccbbba2099 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 03:04:40 -0800 Subject: [PATCH 57/60] feat(control-plane): add deterministic decision-trail artifact contract --- .github/workflows/ci.yml | 1 + .github/workflows/release-control-plane.yml | 28 +++ AGENTS.md | 3 + README.md | 3 + scripts/Test-PolicyContracts.ps1 | 5 + scripts/Test-ReleaseClientContracts.ps1 | 5 + .../Test-ReleaseControlPlanePolicyDrift.ps1 | 18 ++ ...Write-ReleaseControlPlaneDecisionTrail.ps1 | 160 ++++++++++++++++++ .../OpsPolicyDriftWorkflowContract.Tests.ps1 | 2 + tests/ReleaseClientPolicyContract.Tests.ps1 | 7 + ...ontrolPlaneDecisionTrailContract.Tests.ps1 | 36 ++++ ...easeControlPlaneWorkflowContract.Tests.ps1 | 2 + .../workspace-governance.json | 7 + workspace-governance.json | 7 + 14 files changed, 284 insertions(+) create mode 100644 scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 create mode 100644 tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42fbcdd..a05e6b6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,6 +87,7 @@ jobs: './tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1', './tests/BranchProtectionDriftWorkflowContract.Tests.ps1', './tests/ReleaseControlPlaneWorkflowContract.Tests.ps1', + './tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1', './tests/ReleaseRaceHardeningGateWorkflowContract.Tests.ps1', './tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1', './tests/ReleaseRollbackDrillWorkflowContract.Tests.ps1', diff --git a/.github/workflows/release-control-plane.yml b/.github/workflows/release-control-plane.yml index 7a94c61..d4d5751 100644 --- a/.github/workflows/release-control-plane.yml +++ b/.github/workflows/release-control-plane.yml @@ -162,6 +162,26 @@ jobs: -OverrideAuditOutputPath $overrideAuditReportPath ` -OutputPath $reportPath + - name: Build release control plane decision trail + if: always() + shell: pwsh + env: + RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + $ErrorActionPreference = 'Stop' + $reportPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-report.json' + $decisionTrailPath = Join-Path $env:RUNNER_TEMP 'release-control-plane-decision-trail.json' + + & pwsh -NoProfile -File ./scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 ` + -ReportPath $reportPath ` + -Repository '${{ github.repository }}' ` + -Workflow 'release-control-plane.yml' ` + -RunId '${{ github.run_id }}' ` + -RunUrl $env:RUN_URL ` + -Branch '${{ github.ref_name }}' ` + -HeadSha '${{ github.sha }}' ` + -OutputPath $decisionTrailPath + - name: Upload release control plane report if: always() uses: actions/upload-artifact@v4 @@ -170,6 +190,14 @@ jobs: path: ${{ runner.temp }}/release-control-plane-report.json if-no-files-found: error + - name: Upload release control plane decision trail + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-control-plane-decision-trail-${{ github.run_id }} + path: ${{ runner.temp }}/release-control-plane-decision-trail.json + if-no-files-found: error + - name: Upload release control plane stable override audit if: always() uses: actions/upload-artifact@v4 diff --git a/AGENTS.md b/AGENTS.md index 0165a7b..f3877c9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -216,6 +216,7 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `ops_control_plane_policy.schema_version` is required and currently pinned to `2.0`. - `ops_control_plane_policy.state_machine` is required and must emit runtime transition evidence in `release-control-plane-report.json`. - `ops_control_plane_policy.rollback_orchestration` is required and controls deterministic rollback self-healing trigger behavior. +- `ops_control_plane_policy.decision_trail` is required and controls deterministic decision-trail evidence emission (`release-control-plane-decision-trail.json`). - Control-plane mode contract: - `Validate` - `CanaryCycle` @@ -304,6 +305,8 @@ Build and gate lanes must run in isolated workspaces on every run (`D:\dev` pref - `ops_control_plane_state_machine_missing` - `ops_control_plane_state_machine_version_missing` - `ops_control_plane_rollback_orchestration_missing` + - `ops_control_plane_decision_trail_missing` + - `ops_control_plane_decision_trail_schema_version_missing` - `ops_control_plane_error_budget_missing` - `ops_control_plane_error_budget_window_days_invalid` - `ops_control_plane_slo_alert_thresholds_missing` diff --git a/README.md b/README.md index 2d5162d..8d95e06 100644 --- a/README.md +++ b/README.md @@ -411,6 +411,7 @@ Control-plane behavior: 12. Emits deterministic migration warnings when legacy `v0.YYYYMMDD.N` tags are still present before the gate and fails with `semver_only_enforcement_violation` after the gate. 13. Loads GA policy contract `installer_contract.release_client.ops_control_plane_policy.schema_version=2.0` and emits state-machine execution evidence (`state_machine.transitions_executed`) in every report. 14. Executes deterministic rollback orchestration (`Invoke-RollbackDrillSelfHealing.ps1`) when configured trigger reason codes are hit. +15. Emits deterministic decision-trail evidence artifact `release-control-plane-decision-trail.json` (report hash + state-machine + rollback evidence fingerprint). Top-level release-control-plane deterministic failure reason codes include: - `ops_health_gate_failed` @@ -471,6 +472,8 @@ Underlying SLO evaluator `scripts/Test-OpsSloGate.ps1` still emits deterministic - `ops_control_plane_state_machine_missing` - `ops_control_plane_state_machine_version_missing` - `ops_control_plane_rollback_orchestration_missing` + - `ops_control_plane_decision_trail_missing` + - `ops_control_plane_decision_trail_schema_version_missing` - `ops_control_plane_error_budget_missing` - `ops_control_plane_error_budget_window_days_invalid` - `ops_control_plane_slo_alert_thresholds_missing` diff --git a/scripts/Test-PolicyContracts.ps1 b/scripts/Test-PolicyContracts.ps1 index d0e214c..f4293ee 100644 --- a/scripts/Test-PolicyContracts.ps1 +++ b/scripts/Test-PolicyContracts.ps1 @@ -192,6 +192,11 @@ if ($installerContractMembers -contains 'release_client') { Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_orchestration_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_orchestration_trigger_watch_timeout' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_dispatch_watch_timeout') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_rollback_orchestration_trigger_release_verification_failed' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_verification_failed') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_decision_trail_schema_version' -Passed ([string]$releaseClient.ops_control_plane_policy.decision_trail.schema_version -eq '1.0') -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.schema_version) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_decision_trail_artifact_name_prefix' -Passed ([string]$releaseClient.ops_control_plane_policy.decision_trail.artifact_name_prefix -eq 'release-control-plane-decision-trail') -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.artifact_name_prefix) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_decision_trail_hash_algorithm' -Passed ([string]$releaseClient.ops_control_plane_policy.decision_trail.hash_algorithm -eq 'sha256') -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.hash_algorithm) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_decision_trail_include_state_machine' -Passed ([bool]$releaseClient.ops_control_plane_policy.decision_trail.include_state_machine) -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.include_state_machine) + Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_decision_trail_include_rollback_orchestration' -Passed ([bool]$releaseClient.ops_control_plane_policy.decision_trail.include_rollback_orchestration) -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.include_rollback_orchestration) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Scope 'manifest' -Name 'release_client_ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) diff --git a/scripts/Test-ReleaseClientContracts.ps1 b/scripts/Test-ReleaseClientContracts.ps1 index 13c0449..3b377e5 100644 --- a/scripts/Test-ReleaseClientContracts.ps1 +++ b/scripts/Test-ReleaseClientContracts.ps1 @@ -115,6 +115,11 @@ if ($null -ne $releaseClient) { Add-Check -Name 'ops_policy_rollback_orchestration_enabled' -Passed ([bool]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) -Detail ([string]$releaseClient.ops_control_plane_policy.rollback_orchestration.enabled) Add-Check -Name 'ops_policy_rollback_orchestration_trigger_release_dispatch_watch_timeout' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_dispatch_watch_timeout') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) Add-Check -Name 'ops_policy_rollback_orchestration_trigger_release_verification_failed' -Passed (@($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) -contains 'release_verification_failed') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes))) + Add-Check -Name 'ops_policy_decision_trail_schema_version' -Passed ([string]$releaseClient.ops_control_plane_policy.decision_trail.schema_version -eq '1.0') -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.schema_version) + Add-Check -Name 'ops_policy_decision_trail_artifact_name_prefix' -Passed ([string]$releaseClient.ops_control_plane_policy.decision_trail.artifact_name_prefix -eq 'release-control-plane-decision-trail') -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.artifact_name_prefix) + Add-Check -Name 'ops_policy_decision_trail_hash_algorithm' -Passed ([string]$releaseClient.ops_control_plane_policy.decision_trail.hash_algorithm -eq 'sha256') -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.hash_algorithm) + Add-Check -Name 'ops_policy_decision_trail_include_state_machine' -Passed ([bool]$releaseClient.ops_control_plane_policy.decision_trail.include_state_machine) -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.include_state_machine) + Add-Check -Name 'ops_policy_decision_trail_include_rollback_orchestration' -Passed ([bool]$releaseClient.ops_control_plane_policy.decision_trail.include_rollback_orchestration) -Detail ([string]$releaseClient.ops_control_plane_policy.decision_trail.include_rollback_orchestration) Add-Check -Name 'ops_policy_slo_required_workflow_ops_monitoring' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-monitoring') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Name 'ops_policy_slo_required_workflow_ops_autoremediate' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'ops-autoremediate') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) Add-Check -Name 'ops_policy_slo_required_workflow_release_control_plane' -Passed (@($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) -contains 'release-control-plane') -Detail ([string]::Join(',', @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows))) diff --git a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 index e9a4d56..9899134 100644 --- a/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 +++ b/scripts/Test-ReleaseControlPlanePolicyDrift.ps1 @@ -128,6 +128,24 @@ try { Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_rollback_orchestration_missing' } + $decisionTrailPresent = ($null -ne $releaseClient.ops_control_plane_policy.decision_trail) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_decision_trail_present' + passed = $decisionTrailPresent + }) | Out-Null + if (-not $decisionTrailPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_decision_trail_missing' + } else { + $decisionTrailSchemaVersionPresent = (-not [string]::IsNullOrWhiteSpace([string]$releaseClient.ops_control_plane_policy.decision_trail.schema_version)) + $checks.Add([ordered]@{ + check = 'release_client_ops_control_plane_policy_decision_trail_schema_version_present' + passed = $decisionTrailSchemaVersionPresent + }) | Out-Null + if (-not $decisionTrailSchemaVersionPresent) { + Add-ReasonCode -Target $reasonCodes -ReasonCode 'ops_control_plane_decision_trail_schema_version_missing' + } + } + $errorBudgetPresent = ($null -ne $releaseClient.ops_control_plane_policy.error_budget) $checks.Add([ordered]@{ check = 'release_client_ops_control_plane_policy_error_budget_present' diff --git a/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 b/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 new file mode 100644 index 0000000..5cbdd7a --- /dev/null +++ b/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 @@ -0,0 +1,160 @@ +#Requires -Version 7.0 +[CmdletBinding()] +param( + [Parameter(Mandatory = $true)] + [string]$ReportPath, + + [Parameter()] + [string]$Repository = '', + + [Parameter()] + [string]$Workflow = 'release-control-plane.yml', + + [Parameter()] + [string]$RunId = '', + + [Parameter()] + [string]$RunUrl = '', + + [Parameter()] + [string]$Branch = '', + + [Parameter()] + [string]$HeadSha = '', + + [Parameter()] + [string]$OutputPath = '' +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +. (Join-Path $PSScriptRoot 'lib/WorkflowOps.Common.ps1') + +function Get-Sha256HexFromText { + param( + [Parameter(Mandatory = $true)][string]$Text + ) + + $sha = [System.Security.Cryptography.SHA256]::Create() + try { + $bytes = [System.Text.Encoding]::UTF8.GetBytes($Text) + $hash = $sha.ComputeHash($bytes) + return [string]::Join('', ($hash | ForEach-Object { $_.ToString('x2') })) + } finally { + $sha.Dispose() + } +} + +if (-not (Test-Path -LiteralPath $ReportPath -PathType Leaf)) { + throw "control_plane_report_missing: $ReportPath" +} + +$resolvedReportPath = [System.IO.Path]::GetFullPath($ReportPath) +$report = Get-Content -LiteralPath $resolvedReportPath -Raw | ConvertFrom-Json -Depth 100 +$reportSha256 = (Get-FileHash -LiteralPath $resolvedReportPath -Algorithm SHA256).Hash.ToLowerInvariant() + +$stateMachine = $null +if ($null -ne $report.state_machine) { + $stateMachine = [ordered]@{ + version = [string]$report.state_machine.version + initial_state = [string]$report.state_machine.initial_state + current_state = [string]$report.state_machine.current_state + terminal_states = @($report.state_machine.terminal_states | ForEach-Object { [string]$_ }) + transitions_executed = @( + $report.state_machine.transitions_executed | + ForEach-Object { + [ordered]@{ + timestamp_utc = [string]$_.timestamp_utc + from_state = [string]$_.from_state + result = [string]$_.result + to_state = [string]$_.to_state + reason_code = [string]$_.reason_code + detail = [string]$_.detail + } + } + ) + } +} + +$rollbackOrchestration = $null +if ($null -ne $report.rollback_orchestration) { + $rollbackOrchestration = [ordered]@{ + policy_enabled = [bool]$report.rollback_orchestration.policy_enabled + policy_run_on_dry_run = [bool]$report.rollback_orchestration.policy_run_on_dry_run + trigger_reason_codes = @($report.rollback_orchestration.trigger_reason_codes | ForEach-Object { [string]$_ }) + attempted = [bool]$report.rollback_orchestration.attempted + status = [string]$report.rollback_orchestration.status + reason_code = [string]$report.rollback_orchestration.reason_code + message = [string]$report.rollback_orchestration.message + decision = [ordered]@{ + should_attempt = [bool]$report.rollback_orchestration.decision.should_attempt + decision_reason = [string]$report.rollback_orchestration.decision.decision_reason + } + } +} + +$executionSummaries = @( + $report.executions | + ForEach-Object { + [ordered]@{ + mode = [string]$_.target_release.mode + status = [string]$_.target_release.status + reason_code = [string]$_.target_release.reason_code + tag = [string]$_.target_release.tag + } + } +) + +$decisionTrail = [ordered]@{ + schema_version = '1.0' + generated_at_utc = Get-UtcNowIso + run_context = [ordered]@{ + repository = if (-not [string]::IsNullOrWhiteSpace($Repository)) { $Repository } else { [string]$report.repository } + workflow = [string]$Workflow + run_id = [string]$RunId + run_url = [string]$RunUrl + branch = if (-not [string]::IsNullOrWhiteSpace($Branch)) { $Branch } else { [string]$report.branch } + head_sha = [string]$HeadSha + } + report = [ordered]@{ + path = $resolvedReportPath + sha256 = $reportSha256 + status = [string]$report.status + reason_code = [string]$report.reason_code + message = [string]$report.message + mode = [string]$report.mode + dry_run = [bool]$report.dry_run + control_plane_policy_schema_version = [string]$report.control_plane_policy_schema_version + control_plane_policy_source = [string]$report.control_plane_policy_source + } + decision_evidence = [ordered]@{ + state_machine = $stateMachine + rollback_orchestration = $rollbackOrchestration + stable_window_decision = [ordered]@{ + status = [string]$report.stable_promotion_window.decision.status + reason_code = [string]$report.stable_promotion_window.decision.reason_code + can_promote = [bool]$report.stable_promotion_window.decision.can_promote + current_utc_weekday = [string]$report.stable_promotion_window.decision.current_utc_weekday + } + executions = @($executionSummaries) + } +} + +$fingerprintPayload = [ordered]@{ + report_sha256 = [string]$decisionTrail.report.sha256 + report_status = [string]$decisionTrail.report.status + report_reason_code = [string]$decisionTrail.report.reason_code + mode = [string]$decisionTrail.report.mode + state_machine_current_state = if ($null -eq $decisionTrail.decision_evidence.state_machine) { '' } else { [string]$decisionTrail.decision_evidence.state_machine.current_state } + rollback_status = if ($null -eq $decisionTrail.decision_evidence.rollback_orchestration) { '' } else { [string]$decisionTrail.decision_evidence.rollback_orchestration.status } + rollback_reason_code = if ($null -eq $decisionTrail.decision_evidence.rollback_orchestration) { '' } else { [string]$decisionTrail.decision_evidence.rollback_orchestration.reason_code } +} + +$decisionTrail.signature = [ordered]@{ + algorithm = 'sha256' + payload = $fingerprintPayload + fingerprint = Get-Sha256HexFromText -Text ($fingerprintPayload | ConvertTo-Json -Depth 20 -Compress) +} + +Write-WorkflowOpsReport -Report $decisionTrail -OutputPath $OutputPath | Out-Null diff --git a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 index 9726b97..83f2e7a 100644 --- a/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 +++ b/tests/OpsPolicyDriftWorkflowContract.Tests.ps1 @@ -43,6 +43,8 @@ Describe 'Ops policy drift workflow contract' { $script:runtimeContent | Should -Match 'ops_control_plane_state_machine_missing' $script:runtimeContent | Should -Match 'ops_control_plane_state_machine_version_missing' $script:runtimeContent | Should -Match 'ops_control_plane_rollback_orchestration_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_decision_trail_missing' + $script:runtimeContent | Should -Match 'ops_control_plane_decision_trail_schema_version_missing' $script:runtimeContent | Should -Match 'ops_control_plane_error_budget_missing' $script:runtimeContent | Should -Match 'ops_control_plane_error_budget_window_days_invalid' $script:runtimeContent | Should -Match 'ops_control_plane_slo_alert_thresholds_missing' diff --git a/tests/ReleaseClientPolicyContract.Tests.ps1 b/tests/ReleaseClientPolicyContract.Tests.ps1 index e3d594c..0d05cab 100644 --- a/tests/ReleaseClientPolicyContract.Tests.ps1 +++ b/tests/ReleaseClientPolicyContract.Tests.ps1 @@ -77,6 +77,11 @@ Describe 'Release client policy contract' { $releaseClient.ops_control_plane_policy.rollback_orchestration.enabled | Should -BeTrue @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) | Should -Contain 'release_dispatch_watch_timeout' @($releaseClient.ops_control_plane_policy.rollback_orchestration.trigger_reason_codes) | Should -Contain 'release_verification_failed' + $releaseClient.ops_control_plane_policy.decision_trail.schema_version | Should -Be '1.0' + $releaseClient.ops_control_plane_policy.decision_trail.artifact_name_prefix | Should -Be 'release-control-plane-decision-trail' + $releaseClient.ops_control_plane_policy.decision_trail.hash_algorithm | Should -Be 'sha256' + $releaseClient.ops_control_plane_policy.decision_trail.include_state_machine | Should -BeTrue + $releaseClient.ops_control_plane_policy.decision_trail.include_rollback_orchestration | Should -BeTrue @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-monitoring' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'ops-autoremediate' @($releaseClient.ops_control_plane_policy.slo_gate.required_workflows) | Should -Contain 'release-control-plane' @@ -139,6 +144,8 @@ Describe 'Release client policy contract' { $script:policyScriptContent | Should -Match 'ops_policy_error_budget_window_days' $script:policyScriptContent | Should -Match 'ops_policy_state_machine_version' $script:policyScriptContent | Should -Match 'ops_policy_rollback_orchestration_enabled' + $script:policyScriptContent | Should -Match 'ops_policy_decision_trail_schema_version' + $script:policyScriptContent | Should -Match 'ops_policy_decision_trail_hash_algorithm' $script:policyScriptContent | Should -Match 'ops_policy_tag_strategy_semver_only_enforce' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_full_cycle_weekday_monday' $script:policyScriptContent | Should -Match 'ops_policy_stable_window_reason_pattern_exists' diff --git a/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 b/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 new file mode 100644 index 0000000..e6386c4 --- /dev/null +++ b/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 @@ -0,0 +1,36 @@ +#Requires -Version 7.0 +#Requires -Modules Pester + +$ErrorActionPreference = 'Stop' + +Describe 'Release control plane decision trail contract' { + BeforeAll { + $script:repoRoot = (Resolve-Path -Path (Join-Path $PSScriptRoot '..')).Path + $script:scriptPath = Join-Path $script:repoRoot 'scripts/Write-ReleaseControlPlaneDecisionTrail.ps1' + + if (-not (Test-Path -LiteralPath $script:scriptPath -PathType Leaf)) { + throw "Decision trail script missing: $script:scriptPath" + } + + $script:scriptContent = Get-Content -LiteralPath $script:scriptPath -Raw + } + + It 'writes deterministic decision-trail evidence from control-plane report' { + $script:scriptContent | Should -Match 'control_plane_report_missing' + $script:scriptContent | Should -Match 'Get-FileHash' + $script:scriptContent | Should -Match 'decision_evidence' + $script:scriptContent | Should -Match 'state_machine' + $script:scriptContent | Should -Match 'rollback_orchestration' + $script:scriptContent | Should -Match 'stable_window_decision' + $script:scriptContent | Should -Match 'signature' + $script:scriptContent | Should -Match 'fingerprint' + $script:scriptContent | Should -Match 'Write-WorkflowOpsReport' + } + + It 'has parse-safe PowerShell syntax' { + $tokens = $null + $errors = $null + [void][System.Management.Automation.Language.Parser]::ParseInput($script:scriptContent, [ref]$tokens, [ref]$errors) + @($errors).Count | Should -Be 0 + } +} diff --git a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 index bfb0bca..6fc2345 100644 --- a/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneWorkflowContract.Tests.ps1 @@ -42,8 +42,10 @@ Describe 'Release control plane workflow contract' { $script:workflowContent | Should -Match 'RUNNER_ENVIRONMENT' $script:workflowContent | Should -Match 'hosted_runner_required' $script:workflowContent | Should -Match 'Invoke-ReleaseControlPlane\.ps1' + $script:workflowContent | Should -Match 'Write-ReleaseControlPlaneDecisionTrail\.ps1' $script:workflowContent | Should -Match 'Invoke-OpsIncidentLifecycle\.ps1' $script:workflowContent | Should -Match 'release-control-plane-report\.json' + $script:workflowContent | Should -Match 'release-control-plane-decision-trail\.json' $script:workflowContent | Should -Match 'release-control-plane-override-audit\.json' $script:workflowContent | Should -Match 'Release Control Plane Stable Override Alert' $script:workflowContent | Should -Match 'Release Control Plane Alert' diff --git a/workspace-governance-payload/workspace-governance/workspace-governance.json b/workspace-governance-payload/workspace-governance/workspace-governance.json index 48178a7..27e064d 100644 --- a/workspace-governance-payload/workspace-governance/workspace-governance.json +++ b/workspace-governance-payload/workspace-governance/workspace-governance.json @@ -327,6 +327,13 @@ ], "run_on_dry_run": false }, + "decision_trail": { + "schema_version": "1.0", + "artifact_name_prefix": "release-control-plane-decision-trail", + "hash_algorithm": "sha256", + "include_state_machine": true, + "include_rollback_orchestration": true + }, "incident_lifecycle": { "auto_close_on_recovery": true, "reopen_on_regression": true, diff --git a/workspace-governance.json b/workspace-governance.json index 48178a7..27e064d 100644 --- a/workspace-governance.json +++ b/workspace-governance.json @@ -327,6 +327,13 @@ ], "run_on_dry_run": false }, + "decision_trail": { + "schema_version": "1.0", + "artifact_name_prefix": "release-control-plane-decision-trail", + "hash_algorithm": "sha256", + "include_state_machine": true, + "include_rollback_orchestration": true + }, "incident_lifecycle": { "auto_close_on_recovery": true, "reopen_on_regression": true, From b8c90d39553e33bf391a95313e416dc845057430 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 03:09:24 -0800 Subject: [PATCH 58/60] fix(control-plane): tolerate optional stable-window decision fields --- ...Write-ReleaseControlPlaneDecisionTrail.ps1 | 30 +++++++++++-- ...ontrolPlaneDecisionTrailContract.Tests.ps1 | 45 +++++++++++++++++++ 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 b/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 index 5cbdd7a..f6ab963 100644 --- a/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 +++ b/scripts/Write-ReleaseControlPlaneDecisionTrail.ps1 @@ -46,6 +46,25 @@ function Get-Sha256HexFromText { } } +function Get-OptionalPropertyValue { + param( + [Parameter()][object]$Object, + [Parameter(Mandatory = $true)][string]$Name, + [Parameter()][object]$Default = $null + ) + + if ($null -eq $Object) { + return $Default + } + + $property = $Object.PSObject.Properties[$Name] + if ($null -eq $property) { + return $Default + } + + return $property.Value +} + if (-not (Test-Path -LiteralPath $ReportPath -PathType Leaf)) { throw "control_plane_report_missing: $ReportPath" } @@ -106,6 +125,9 @@ $executionSummaries = @( } ) +$stablePromotionWindow = Get-OptionalPropertyValue -Object $report -Name 'stable_promotion_window' -Default $null +$stableWindowDecision = Get-OptionalPropertyValue -Object $stablePromotionWindow -Name 'decision' -Default $null + $decisionTrail = [ordered]@{ schema_version = '1.0' generated_at_utc = Get-UtcNowIso @@ -132,10 +154,10 @@ $decisionTrail = [ordered]@{ state_machine = $stateMachine rollback_orchestration = $rollbackOrchestration stable_window_decision = [ordered]@{ - status = [string]$report.stable_promotion_window.decision.status - reason_code = [string]$report.stable_promotion_window.decision.reason_code - can_promote = [bool]$report.stable_promotion_window.decision.can_promote - current_utc_weekday = [string]$report.stable_promotion_window.decision.current_utc_weekday + status = [string](Get-OptionalPropertyValue -Object $stableWindowDecision -Name 'status' -Default '') + reason_code = [string](Get-OptionalPropertyValue -Object $stableWindowDecision -Name 'reason_code' -Default '') + can_promote = [bool](Get-OptionalPropertyValue -Object $stableWindowDecision -Name 'can_promote' -Default $false) + current_utc_weekday = [string](Get-OptionalPropertyValue -Object $stableWindowDecision -Name 'current_utc_weekday' -Default '') } executions = @($executionSummaries) } diff --git a/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 b/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 index e6386c4..a310ae8 100644 --- a/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 +++ b/tests/ReleaseControlPlaneDecisionTrailContract.Tests.ps1 @@ -33,4 +33,49 @@ Describe 'Release control plane decision trail contract' { [void][System.Management.Automation.Language.Parser]::ParseInput($script:scriptContent, [ref]$tokens, [ref]$errors) @($errors).Count | Should -Be 0 } + + It 'handles missing optional stable-window decision fields in validate mode' { + $tempRoot = Join-Path ([System.IO.Path]::GetTempPath()) ("decision-trail-contract-" + [Guid]::NewGuid().ToString('N')) + New-Item -Path $tempRoot -ItemType Directory -Force | Out-Null + try { + $reportPath = Join-Path $tempRoot 'release-control-plane-report.json' + $trailPath = Join-Path $tempRoot 'release-control-plane-decision-trail.json' + $report = [ordered]@{ + schema_version = '1.0' + timestamp_utc = '2026-02-27T11:07:04.0000000Z' + repository = 'LabVIEW-Community-CI-CD/labview-cdev-surface-fork' + branch = 'main' + mode = 'Validate' + dry_run = $true + control_plane_policy_schema_version = '2.0' + control_plane_policy_source = 'workspace_governance' + status = 'pass' + reason_code = 'validate_dry_run' + message = 'ok' + state_machine = $null + rollback_orchestration = $null + stable_promotion_window = [ordered]@{ + decision = [ordered]@{ + status = 'skipped' + reason_code = 'not_full_cycle_mode' + } + } + executions = @() + } + + $report | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $reportPath -Encoding utf8 + & pwsh -NoProfile -File $script:scriptPath -ReportPath $reportPath -OutputPath $trailPath | Out-Null + + Test-Path -LiteralPath $trailPath -PathType Leaf | Should -BeTrue + $trail = Get-Content -LiteralPath $trailPath -Raw | ConvertFrom-Json -Depth 20 + $trail.decision_evidence.stable_window_decision.status | Should -Be 'skipped' + $trail.decision_evidence.stable_window_decision.reason_code | Should -Be 'not_full_cycle_mode' + $trail.decision_evidence.stable_window_decision.can_promote | Should -BeFalse + $trail.decision_evidence.stable_window_decision.current_utc_weekday | Should -Be '' + } finally { + if (Test-Path -LiteralPath $tempRoot -PathType Container) { + Remove-Item -LiteralPath $tempRoot -Recurse -Force + } + } + } } From 1859a38c6739acf9d28a2399373685bf8a93d1a6 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 03:25:52 -0800 Subject: [PATCH 59/60] feat(drill): add deterministic forced control-plane timeout mode --- .github/workflows/release-race-hardening-drill.yml | 12 ++++++++++++ scripts/Invoke-ReleaseRaceHardeningDrill.ps1 | 7 +++++++ ...leaseRaceHardeningDrillWorkflowContract.Tests.ps1 | 2 ++ 3 files changed, 21 insertions(+) diff --git a/.github/workflows/release-race-hardening-drill.yml b/.github/workflows/release-race-hardening-drill.yml index e79a442..1c79c57 100644 --- a/.github/workflows/release-race-hardening-drill.yml +++ b/.github/workflows/release-race-hardening-drill.yml @@ -20,6 +20,11 @@ on: required: false default: '120' type: string + force_control_plane_watch_timeout: + description: Force a deterministic control-plane watch-timeout failure for incident-path drills. + required: false + default: false + type: boolean permissions: contents: read @@ -77,12 +82,19 @@ jobs: $autoRemediate = [System.Convert]::ToBoolean($autoRemediateText) } + $forceControlPlaneWatchTimeoutText = [string]'${{ inputs.force_control_plane_watch_timeout }}' + $forceControlPlaneWatchTimeout = $false + if (-not [string]::IsNullOrWhiteSpace($forceControlPlaneWatchTimeoutText)) { + $forceControlPlaneWatchTimeout = [System.Convert]::ToBoolean($forceControlPlaneWatchTimeoutText) + } + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseRaceHardeningDrill.ps1 ` -Repository '${{ github.repository }}' ` -Branch 'main' ` -AutoRemediate:$autoRemediate ` -KeepLatestCanaryN $keepLatestCanaryN ` -WatchTimeoutMinutes $watchTimeoutMinutes ` + -ForceControlPlaneWatchTimeout:$forceControlPlaneWatchTimeout ` -OutputPath $reportPath - name: Upload release race-hardening drill report diff --git a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 index 813d5dd..a5997df 100644 --- a/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 +++ b/scripts/Invoke-ReleaseRaceHardeningDrill.ps1 @@ -32,6 +32,9 @@ param( [Parameter()] [bool]$AutoRemediate = $true, + [Parameter()] + [bool]$ForceControlPlaneWatchTimeout = $false, + [Parameter()] [string]$OutputPath = '' ) @@ -337,6 +340,7 @@ $report = [ordered]@{ release_limit = $ReleaseLimit watch_timeout_minutes = $WatchTimeoutMinutes auto_remediate = [bool]$AutoRemediate + force_control_plane_watch_timeout = [bool]$ForceControlPlaneWatchTimeout keep_latest_canary_n = $KeepLatestCanaryN predicted_canary_tag = '' predicted_canary_core = '' @@ -449,6 +453,9 @@ try { inputs = @($controlPlaneDispatch.inputs | ForEach-Object { [string]$_ }) timestamp_utc = [string]$controlPlaneDispatch.timestamp_utc } + if ([bool]$ForceControlPlaneWatchTimeout) { + throw "control_plane_watch_timeout: injected_for_drill run_id=$controlPlaneRunId timeout_minutes=$WatchTimeoutMinutes" + } $contenderDispatchedAt = [DateTimeOffset]::MinValue $controlPlaneDispatchedAt = [DateTimeOffset]::MinValue diff --git a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 index 5c403f2..1880a0c 100644 --- a/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 +++ b/tests/ReleaseRaceHardeningDrillWorkflowContract.Tests.ps1 @@ -25,6 +25,7 @@ Describe 'Release race-hardening drill workflow contract' { $script:workflowContent | Should -Match 'auto_remediate' $script:workflowContent | Should -Match 'keep_latest_canary_n' $script:workflowContent | Should -Match 'watch_timeout_minutes' + $script:workflowContent | Should -Match 'force_control_plane_watch_timeout' } It 'runs on hosted runner, executes drill runtime, and uploads drill + weekly summary artifacts' { @@ -52,6 +53,7 @@ Describe 'Release race-hardening drill workflow contract' { $script:runtimeContent | Should -Match 'contender_dispatch_report_invalid' $script:runtimeContent | Should -Match 'control_plane_dispatch_report_invalid' $script:runtimeContent | Should -Match 'control_plane_watch_timeout' + $script:runtimeContent | Should -Match 'injected_for_drill' $script:runtimeContent | Should -Match 'contender_run_id' $script:runtimeContent | Should -Match 'control_plane_run_id' $script:runtimeContent | Should -Match 'tag_already_published_by_peer' From 69e65c7a697d3429d2d077921bbfe83caa0e6d76 Mon Sep 17 00:00:00 2001 From: svelderrainruiz Date: Fri, 27 Feb 2026 03:43:28 -0800 Subject: [PATCH 60/60] fix(guardrails): emit invalid_input report on workflow input guard failures --- .../release-guardrails-autoremediate.yml | 139 +++++++++++++----- ...sAutoRemediationWorkflowContract.Tests.ps1 | 10 ++ 2 files changed, 113 insertions(+), 36 deletions(-) diff --git a/.github/workflows/release-guardrails-autoremediate.yml b/.github/workflows/release-guardrails-autoremediate.yml index 5635d37..4fbe69e 100644 --- a/.github/workflows/release-guardrails-autoremediate.yml +++ b/.github/workflows/release-guardrails-autoremediate.yml @@ -57,50 +57,117 @@ jobs: $ErrorActionPreference = 'Stop' $reportPath = Join-Path $env:RUNNER_TEMP 'release-guardrails-autoremediate-report.json' - $raceGateMaxAgeText = [string]'${{ inputs.race_gate_max_age_hours }}' - $raceGateMaxAgeHours = 168 - if (-not [string]::IsNullOrWhiteSpace($raceGateMaxAgeText)) { - $parsedRaceGateMaxAge = 0 - if (-not [int]::TryParse($raceGateMaxAgeText, [ref]$parsedRaceGateMaxAge)) { - throw "race_gate_max_age_hours must be an integer. actual='$raceGateMaxAgeText'" + function Write-InvalidInputReport { + param( + [Parameter(Mandatory = $true)][string]$Message, + [Parameter(Mandatory = $true)][object]$InputSnapshot + ) + + $report = [ordered]@{ + schema_version = '1.0' + generated_at_utc = (Get-Date).ToUniversalTime().ToString('o') + repository = '${{ github.repository }}' + branch = 'main' + drill_workflow = 'release-race-hardening-drill.yml' + race_gate_max_age_hours = [string]$InputSnapshot.race_gate_max_age_hours + auto_self_heal = [string]$InputSnapshot.auto_self_heal + max_attempts = [string]$InputSnapshot.max_attempts + drill_watch_timeout_minutes = [string]$InputSnapshot.drill_watch_timeout_minutes + status = 'fail' + reason_code = 'invalid_input' + message = $Message + remediation_hints = @( + 'Provide race_gate_max_age_hours between 1 and 720.', + 'Provide max_attempts between 1 and 5.', + 'Provide drill_watch_timeout_minutes between 5 and 240.', + 'Provide auto_self_heal as true or false.' + ) + initial_assessment = $null + remediation_attempts = @() + final_assessment = $null } - $raceGateMaxAgeHours = $parsedRaceGateMaxAge + + $report | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $reportPath -Encoding utf8 } - $maxAttemptsText = [string]'${{ inputs.max_attempts }}' - $maxAttempts = 1 - if (-not [string]::IsNullOrWhiteSpace($maxAttemptsText)) { - $parsedMaxAttempts = 0 - if (-not [int]::TryParse($maxAttemptsText, [ref]$parsedMaxAttempts)) { - throw "max_attempts must be an integer. actual='$maxAttemptsText'" - } - $maxAttempts = $parsedMaxAttempts + $inputSnapshot = [ordered]@{ + race_gate_max_age_hours = [string]'${{ inputs.race_gate_max_age_hours }}' + auto_self_heal = [string]'${{ inputs.auto_self_heal }}' + max_attempts = [string]'${{ inputs.max_attempts }}' + drill_watch_timeout_minutes = [string]'${{ inputs.drill_watch_timeout_minutes }}' } - $watchTimeoutText = [string]'${{ inputs.drill_watch_timeout_minutes }}' - $watchTimeoutMinutes = 120 - if (-not [string]::IsNullOrWhiteSpace($watchTimeoutText)) { - $parsedWatchTimeout = 0 - if (-not [int]::TryParse($watchTimeoutText, [ref]$parsedWatchTimeout)) { - throw "drill_watch_timeout_minutes must be an integer. actual='$watchTimeoutText'" + try { + $raceGateMaxAgeText = [string]$inputSnapshot.race_gate_max_age_hours + $raceGateMaxAgeHours = 168 + if (-not [string]::IsNullOrWhiteSpace($raceGateMaxAgeText)) { + $parsedRaceGateMaxAge = 0 + if (-not [int]::TryParse($raceGateMaxAgeText, [ref]$parsedRaceGateMaxAge)) { + throw "race_gate_max_age_hours must be an integer. actual='$raceGateMaxAgeText'" + } + if ($parsedRaceGateMaxAge -lt 1 -or $parsedRaceGateMaxAge -gt 720) { + throw "race_gate_max_age_hours must be between 1 and 720. actual='$raceGateMaxAgeText'" + } + $raceGateMaxAgeHours = $parsedRaceGateMaxAge } - $watchTimeoutMinutes = $parsedWatchTimeout - } - $autoSelfHealText = [string]'${{ inputs.auto_self_heal }}' - $autoSelfHeal = $true - if (-not [string]::IsNullOrWhiteSpace($autoSelfHealText)) { - $autoSelfHeal = [System.Convert]::ToBoolean($autoSelfHealText) - } + $maxAttemptsText = [string]$inputSnapshot.max_attempts + $maxAttempts = 1 + if (-not [string]::IsNullOrWhiteSpace($maxAttemptsText)) { + $parsedMaxAttempts = 0 + if (-not [int]::TryParse($maxAttemptsText, [ref]$parsedMaxAttempts)) { + throw "max_attempts must be an integer. actual='$maxAttemptsText'" + } + if ($parsedMaxAttempts -lt 1 -or $parsedMaxAttempts -gt 5) { + throw "max_attempts must be between 1 and 5. actual='$maxAttemptsText'" + } + $maxAttempts = $parsedMaxAttempts + } + + $watchTimeoutText = [string]$inputSnapshot.drill_watch_timeout_minutes + $watchTimeoutMinutes = 120 + if (-not [string]::IsNullOrWhiteSpace($watchTimeoutText)) { + $parsedWatchTimeout = 0 + if (-not [int]::TryParse($watchTimeoutText, [ref]$parsedWatchTimeout)) { + throw "drill_watch_timeout_minutes must be an integer. actual='$watchTimeoutText'" + } + if ($parsedWatchTimeout -lt 5 -or $parsedWatchTimeout -gt 240) { + throw "drill_watch_timeout_minutes must be between 5 and 240. actual='$watchTimeoutText'" + } + $watchTimeoutMinutes = $parsedWatchTimeout + } + + $autoSelfHealText = [string]$inputSnapshot.auto_self_heal + $autoSelfHeal = $true + if (-not [string]::IsNullOrWhiteSpace($autoSelfHealText)) { + try { + $autoSelfHeal = [System.Convert]::ToBoolean($autoSelfHealText) + } catch { + throw "auto_self_heal must be a boolean. actual='$autoSelfHealText'" + } + } - & pwsh -NoProfile -File ./scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 ` - -Repository '${{ github.repository }}' ` - -Branch 'main' ` - -RaceGateMaxAgeHours $raceGateMaxAgeHours ` - -AutoSelfHeal:$autoSelfHeal ` - -MaxAttempts $maxAttempts ` - -DrillWatchTimeoutMinutes $watchTimeoutMinutes ` - -OutputPath $reportPath + & pwsh -NoProfile -File ./scripts/Invoke-ReleaseGuardrailsSelfHealing.ps1 ` + -Repository '${{ github.repository }}' ` + -Branch 'main' ` + -RaceGateMaxAgeHours $raceGateMaxAgeHours ` + -AutoSelfHeal:$autoSelfHeal ` + -MaxAttempts $maxAttempts ` + -DrillWatchTimeoutMinutes $watchTimeoutMinutes ` + -OutputPath $reportPath + } catch { + $failureMessage = [string]$_.Exception.Message + if (-not (Test-Path -LiteralPath $reportPath -PathType Leaf) -and ( + $failureMessage -match '^race_gate_max_age_hours must be ' -or + $failureMessage -match '^max_attempts must be ' -or + $failureMessage -match '^drill_watch_timeout_minutes must be ' -or + $failureMessage -match '^auto_self_heal must be ' + )) { + Write-InvalidInputReport -Message ("invalid_input: $failureMessage") -InputSnapshot $inputSnapshot + } + + throw + } - name: Upload release guardrails auto-remediation report if: always() diff --git a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 index b021313..859fbde 100644 --- a/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 +++ b/tests/ReleaseGuardrailsAutoRemediationWorkflowContract.Tests.ps1 @@ -44,6 +44,16 @@ Describe 'Release guardrails auto-remediation workflow contract' { $script:workflowContent | Should -Match '-Mode Recover' } + It 'writes a deterministic invalid_input report when workflow input guards fail' { + $script:workflowContent | Should -Match 'Write-InvalidInputReport' + $script:workflowContent | Should -Match "reason_code = 'invalid_input'" + $script:workflowContent | Should -Match 'invalid_input:' + $script:workflowContent | Should -Match 'race_gate_max_age_hours must be between 1 and 720' + $script:workflowContent | Should -Match 'max_attempts must be between 1 and 5' + $script:workflowContent | Should -Match 'drill_watch_timeout_minutes must be between 5 and 240' + $script:workflowContent | Should -Match 'Set-Content -LiteralPath \$reportPath' + } + It 'enforces autonomous remediation paths for branch protection and race gate freshness' { $script:runtimeContent | Should -Match 'Test-ReleaseBranchProtectionPolicy\.ps1' $script:runtimeContent | Should -Match 'Set-ReleaseBranchProtectionPolicy\.ps1'