diff --git a/src/cli/__tests__/errors.test.ts b/src/cli/__tests__/errors.test.ts index 4a8d2c1cf..93f61059e 100644 --- a/src/cli/__tests__/errors.test.ts +++ b/src/cli/__tests__/errors.test.ts @@ -2,8 +2,10 @@ import { AgentAlreadyExistsError, getErrorMessage, isChangesetInProgressError, + isEarlyValidationError, isExpiredTokenError, isNoCredentialsError, + isReviewInProgressError, isStackInProgressError, } from '../errors.js'; import { describe, expect, it } from 'vitest'; @@ -204,4 +206,57 @@ describe('errors', () => { expect(isChangesetInProgressError({})).toBe(false); }); }); + + describe('isEarlyValidationError', () => { + it('detects AWS::EarlyValidation::PropertyValidation messages', () => { + expect( + isEarlyValidationError( + new Error('The following hook(s)/validation failed: [AWS::EarlyValidation::PropertyValidation]') + ) + ).toBe(true); + expect(isEarlyValidationError(new Error('EarlyValidation::PropertyValidation rejected runtime'))).toBe(true); + }); + + it('detects other AWS::EarlyValidation:: hook subtypes', () => { + // Future-proofing: any hook under the EarlyValidation namespace should match. + expect(isEarlyValidationError(new Error('AWS::EarlyValidation::SomeOtherCheck failed for resource'))).toBe(true); + }); + + it('detects messages that only carry the hook(s)/validation framing', () => { + // When CFN logs the hook name with different separators, we still recognize it. + expect( + isEarlyValidationError( + new Error('The following hook(s)/validation failed: [AWS-EarlyValidation-PropertyValidation]') + ) + ).toBe(true); + }); + + it('returns false for unrelated errors', () => { + expect(isEarlyValidationError(new Error('CREATE_FAILED'))).toBe(false); + expect(isEarlyValidationError(new Error('Stack not found'))).toBe(false); + expect(isEarlyValidationError(null)).toBe(false); + }); + }); + + describe('isReviewInProgressError', () => { + it('detects REVIEW_IN_PROGRESS errors', () => { + expect(isReviewInProgressError(new Error('Stack "MyStack" is currently in REVIEW_IN_PROGRESS state.'))).toBe( + true + ); + expect(isReviewInProgressError(new Error('Stack "MyStack" is in REVIEW_IN_PROGRESS state'))).toBe(true); + expect(isReviewInProgressError(new Error('REVIEW_IN_PROGRESS state detected'))).toBe(true); + }); + + it('does not match bare REVIEW_IN_PROGRESS token in passing references', () => { + // e.g. an event-stream history message mentioning prior statuses + expect( + isReviewInProgressError(new Error('History: REVIEW_IN_PROGRESS -> CREATE_IN_PROGRESS -> CREATE_COMPLETE')) + ).toBe(false); + }); + + it('returns false for unrelated errors', () => { + expect(isReviewInProgressError(new Error('UPDATE_IN_PROGRESS'))).toBe(false); + expect(isReviewInProgressError(null)).toBe(false); + }); + }); }); diff --git a/src/cli/cloudformation/__tests__/stack-cleanup.test.ts b/src/cli/cloudformation/__tests__/stack-cleanup.test.ts new file mode 100644 index 000000000..1c5892a26 --- /dev/null +++ b/src/cli/cloudformation/__tests__/stack-cleanup.test.ts @@ -0,0 +1,292 @@ +import { recoverReviewInProgressStack } from '../stack-cleanup.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockSend, DescribeStacksCommand, ListChangeSetsCommand, DeleteStackCommand } = vi.hoisted(() => ({ + mockSend: vi.fn(), + DescribeStacksCommand: class { + constructor(public input: unknown) {} + }, + ListChangeSetsCommand: class { + constructor(public input: unknown) {} + }, + DeleteStackCommand: class { + constructor(public input: unknown) {} + }, +})); + +vi.mock('@aws-sdk/client-cloudformation', () => ({ + CloudFormationClient: class { + send = mockSend; + }, + DescribeStacksCommand, + ListChangeSetsCommand, + DeleteStackCommand, +})); + +vi.mock('../../aws', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({}), +})); + +describe('recoverReviewInProgressStack', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + function makeClient() { + return { send: mockSend } as unknown as Parameters[2] extends infer O + ? O extends { client?: infer C } + ? C + : never + : never; + } + + it('deletes stack when REVIEW_IN_PROGRESS and all change sets failed', async () => { + // describe + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + // list change sets - one FAILED + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED', ExecutionStatus: 'UNAVAILABLE' }] }); + // delete stack + mockSend.mockResolvedValueOnce({}); + // poll - stack no longer exists + const validationErr = new Error('Stack does not exist'); + validationErr.name = 'ValidationError'; + mockSend.mockRejectedValueOnce(validationErr); + + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }); + + expect(result.deleted).toBe(true); + expect(result.changeSetCount).toBe(1); + expect(result.allChangeSetsNonExecuted).toBe(true); + + // Verify DeleteStackCommand was sent + const deleteCalls = mockSend.mock.calls.filter(c => c[0] instanceof DeleteStackCommand); + expect(deleteCalls.length).toBe(1); + }); + + it('throws when stack is not in REVIEW_IN_PROGRESS', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'CREATE_COMPLETE' }] }); + await expect(recoverReviewInProgressStack('us-east-1', 'MyStack', { client: makeClient() })).rejects.toThrow( + /expected status REVIEW_IN_PROGRESS/ + ); + }); + + it('throws when stack does not exist at all', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [] }); + await expect(recoverReviewInProgressStack('us-east-1', 'MyStack', { client: makeClient() })).rejects.toThrow( + /not found/ + ); + }); + + it('refuses to delete when a change set has been executed', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ + Summaries: [{ Status: 'CREATE_COMPLETE', ExecutionStatus: 'EXECUTE_COMPLETE' }], + }); + + await expect(recoverReviewInProgressStack('us-east-1', 'MyStack', { client: makeClient() })).rejects.toThrow( + /at least one change set has been executed/ + ); + + // Ensure delete was not called + const deleteCalls = mockSend.mock.calls.filter(c => c[0] instanceof DeleteStackCommand); + expect(deleteCalls.length).toBe(0); + }); + + it('handles paginated change set lists', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ + Summaries: [{ Status: 'FAILED' }], + NextToken: 'page-2', + }); + mockSend.mockResolvedValueOnce({ + Summaries: [{ Status: 'OBSOLETE' }], + }); + mockSend.mockResolvedValueOnce({}); // delete + const validationErr = new Error('Stack does not exist'); + validationErr.name = 'ValidationError'; + mockSend.mockRejectedValueOnce(validationErr); // poll + + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }); + expect(result.deleted).toBe(true); + expect(result.changeSetCount).toBe(2); + }); + + it('handles DELETE_FAILED during poll', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED' }] }); + mockSend.mockResolvedValueOnce({}); // delete + mockSend.mockResolvedValueOnce({ + Stacks: [{ StackStatus: 'DELETE_FAILED', StackStatusReason: 'permission denied' }], + }); + + await expect( + recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }) + ).rejects.toThrow(/Failed to delete stack/); + }); + + it('proceeds with deletion when ListChangeSets returns no summaries (with warning)', async () => { + // CloudFormation may auto-purge old change sets; REVIEW_IN_PROGRESS itself + // is sufficient evidence that the stack contains no resources. + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [] }); + mockSend.mockResolvedValueOnce({}); // delete + const validationErr = new Error('Stack does not exist'); + validationErr.name = 'ValidationError'; + mockSend.mockRejectedValueOnce(validationErr); + + const warnings: string[] = []; + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + onWarning: msg => warnings.push(msg), + }); + expect(result.deleted).toBe(true); + expect(result.changeSetCount).toBe(0); + expect(warnings.length).toBe(1); + expect(warnings[0]).toContain('no change sets'); + const deleteCalls = mockSend.mock.calls.filter(c => c[0] instanceof DeleteStackCommand); + expect(deleteCalls.length).toBe(1); + }); + + it('treats Status=CREATE_COMPLETE + ExecutionStatus=AVAILABLE as recoverable', async () => { + // This is the typical shape of the change set that *put* the stack into + // REVIEW_IN_PROGRESS in the first place — created successfully but never + // executed (e.g. the user is recovering from a prior early-validation + // failure flow). + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ + Summaries: [{ Status: 'CREATE_COMPLETE', ExecutionStatus: 'AVAILABLE' }], + }); + mockSend.mockResolvedValueOnce({}); // delete + const validationErr = new Error('Stack does not exist'); + validationErr.name = 'ValidationError'; + mockSend.mockRejectedValueOnce(validationErr); + + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }); + expect(result.deleted).toBe(true); + expect(result.allChangeSetsNonExecuted).toBe(true); + }); + + it('treats ExecutionStatus=EXECUTE_FAILED as recoverable', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ + Summaries: [{ Status: 'CREATE_COMPLETE', ExecutionStatus: 'EXECUTE_FAILED' }], + }); + mockSend.mockResolvedValueOnce({}); // delete + const validationErr = new Error('Stack does not exist'); + validationErr.name = 'ValidationError'; + mockSend.mockRejectedValueOnce(validationErr); + + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }); + expect(result.deleted).toBe(true); + }); + + it('throws a Timed out error when the deadline elapses without delete completing', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED' }] }); + mockSend.mockResolvedValueOnce({}); // delete + // Subsequent describes always return DELETE_IN_PROGRESS + mockSend.mockResolvedValue({ Stacks: [{ StackStatus: 'DELETE_IN_PROGRESS' }] }); + + await expect( + recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 5, + client: makeClient(), + }) + ).rejects.toThrow(/Timed out waiting for stack/); + }); + + it('re-throws non-ValidationError errors raised by DescribeStacks during polling', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED' }] }); + mockSend.mockResolvedValueOnce({}); // delete + const accessDenied = new Error('Access denied'); + accessDenied.name = 'AccessDeniedException'; + mockSend.mockRejectedValueOnce(accessDenied); + + await expect( + recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }) + ).rejects.toThrow(/Access denied/); + }); + + it('treats ValidationException (SDK v3 surface) as stack-deleted during polling', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED' }] }); + mockSend.mockResolvedValueOnce({}); // delete + const validationEx = new Error('Stack with id MyStack does not exist'); + validationEx.name = 'ValidationException'; + mockSend.mockRejectedValueOnce(validationEx); + + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }); + expect(result.deleted).toBe(true); + }); + + it('treats arbitrary "does not exist" errors as stack-deleted during polling', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED' }] }); + mockSend.mockResolvedValueOnce({}); // delete + const wrappedErr = new Error('Stack with id MyStack does not exist'); + wrappedErr.name = 'CloudFormationServiceException'; + mockSend.mockRejectedValueOnce(wrappedErr); + + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + }); + expect(result.deleted).toBe(true); + }); + + it('invokes onProgress callback for each poll iteration', async () => { + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + mockSend.mockResolvedValueOnce({ Summaries: [{ Status: 'FAILED' }] }); + mockSend.mockResolvedValueOnce({}); // delete + // First poll returns DELETE_IN_PROGRESS, second poll returns "not found" + mockSend.mockResolvedValueOnce({ Stacks: [{ StackStatus: 'DELETE_IN_PROGRESS' }] }); + const validationErr = new Error('Stack does not exist'); + validationErr.name = 'ValidationError'; + mockSend.mockRejectedValueOnce(validationErr); + + const heartbeats: { stackStatus: string; elapsedMs: number }[] = []; + const result = await recoverReviewInProgressStack('us-east-1', 'MyStack', { + pollIntervalMs: 1, + timeoutMs: 1000, + client: makeClient(), + onProgress: info => heartbeats.push(info), + }); + expect(result.deleted).toBe(true); + expect(heartbeats.length).toBeGreaterThanOrEqual(2); + expect(heartbeats[0]!.stackStatus).toBe('DELETE_IN_PROGRESS'); + expect(heartbeats[heartbeats.length - 1]!.stackStatus).toBe('DELETE_COMPLETE'); + }); +}); diff --git a/src/cli/cloudformation/__tests__/stack-status.test.ts b/src/cli/cloudformation/__tests__/stack-status.test.ts index 1df83fc95..da5d22f29 100644 --- a/src/cli/cloudformation/__tests__/stack-status.test.ts +++ b/src/cli/cloudformation/__tests__/stack-status.test.ts @@ -50,7 +50,6 @@ describe('checkStackStatus', () => { 'DELETE_IN_PROGRESS', 'ROLLBACK_IN_PROGRESS', 'UPDATE_ROLLBACK_IN_PROGRESS', - 'REVIEW_IN_PROGRESS', ]; for (const status of inProgressStatuses) { @@ -59,9 +58,20 @@ describe('checkStackStatus', () => { expect(result.canDeploy, `${status} should block deploy`).toBe(false); expect(result.exists).toBe(true); expect(result.message).toContain(status); + expect(result.isRecoverableReview).toBeFalsy(); } }); + it('returns canDeploy false and isRecoverableReview true for REVIEW_IN_PROGRESS', async () => { + mockSend.mockResolvedValue({ Stacks: [{ StackStatus: 'REVIEW_IN_PROGRESS' }] }); + const result = await checkStackStatus('us-east-1', 'MyStack'); + expect(result.canDeploy).toBe(false); + expect(result.exists).toBe(true); + expect(result.status).toBe('REVIEW_IN_PROGRESS'); + expect(result.isRecoverableReview).toBe(true); + expect(result.message).toContain('--recover'); + }); + it('returns canDeploy false for failed status', async () => { const failedStatuses = ['CREATE_FAILED', 'ROLLBACK_FAILED', 'DELETE_FAILED', 'UPDATE_ROLLBACK_FAILED']; diff --git a/src/cli/cloudformation/index.ts b/src/cli/cloudformation/index.ts index 4c41c05c8..f94c2edde 100644 --- a/src/cli/cloudformation/index.ts +++ b/src/cli/cloudformation/index.ts @@ -1,5 +1,6 @@ export * from './bootstrap'; export * from './outputs'; +export * from './stack-cleanup'; export * from './stack-discovery'; export * from './stack-status'; export * from './types'; diff --git a/src/cli/cloudformation/stack-cleanup.ts b/src/cli/cloudformation/stack-cleanup.ts new file mode 100644 index 000000000..f61334f28 --- /dev/null +++ b/src/cli/cloudformation/stack-cleanup.ts @@ -0,0 +1,223 @@ +import { getCredentialProvider } from '../aws'; +import { + CloudFormationClient, + DeleteStackCommand, + DescribeStacksCommand, + ListChangeSetsCommand, +} from '@aws-sdk/client-cloudformation'; + +/** + * CloudFormation change-set `Status` values that indicate the change set + * itself never reached a successful state (so deleting the stack is safe). + * + * `CREATE_COMPLETE` is also safe **iff** the corresponding `ExecutionStatus` + * indicates the change set was never actually executed (see + * {@link NON_EXECUTED_EXECUTION_STATUSES}). The combined check is performed + * inline in {@link recoverReviewInProgressStack}. + */ +const NON_EXECUTED_CHANGE_SET_STATUSES = new Set(['FAILED', 'OBSOLETE']); + +/** + * CloudFormation change-set `ExecutionStatus` values that indicate the change + * set has not been executed against the stack. + * + * - `UNAVAILABLE` — change set creation failed; cannot be executed. + * - `AVAILABLE` — change set is ready to execute but hasn't been. + * - `EXECUTE_FAILED` — execution attempt failed; CloudFormation does not + * apply partial changes for `CHANGE_SET_TYPE=CREATE` so the stack stays + * resource-free. + * - `OBSOLETE` — superseded by another change set; will not execute. + */ +const NON_EXECUTED_EXECUTION_STATUSES = new Set(['UNAVAILABLE', 'AVAILABLE', 'EXECUTE_FAILED', 'OBSOLETE']); + +export interface RecoverReviewInProgressOptions { + /** Maximum total time to wait for stack deletion (ms). Default: 5 minutes. */ + timeoutMs?: number; + /** Polling interval between status checks (ms). Default: 5 seconds. */ + pollIntervalMs?: number; + /** + * Optional CloudFormation client override (used in tests). When omitted, a + * new client is constructed using the configured region and credentials. + */ + client?: CloudFormationClient; + /** + * Optional logger callback for non-fatal observations (e.g. "no change sets + * found, proceeding because stack is REVIEW_IN_PROGRESS"). Tests and the + * deploy flow can hook this in; defaults to a no-op so the helper is + * usable from any context. + */ + onWarning?: (message: string) => void; + /** + * Optional progress callback invoked once before every poll iteration with + * the current `StackStatus` (or `'unknown'` if the describe failed without + * a recognised stack-missing error). Use this to emit a heartbeat so users + * don't think the recovery flow has hung. Receives the elapsed time in ms + * since the helper started. + */ + onProgress?: (info: { stackStatus: string; elapsedMs: number }) => void; +} + +/** + * Returns true when the given error indicates the stack does not exist. + * Robust against AWS SDK v3 surface variations: the literal name was + * historically `ValidationError`, but some service exception subclasses + * report `ValidationException` or operation-specific names. The error + * message ("Stack with id X does not exist") is the most stable signal, + * so fall back to it. + */ +function isStackNotFoundError(err: unknown): boolean { + if (!(err instanceof Error)) return false; + if (err.name === 'ValidationError' || err.name === 'ValidationException') return true; + return /does not exist/i.test(err.message); +} + +export interface RecoverReviewInProgressResult { + /** Whether the stack was successfully deleted. */ + deleted: boolean; + /** Number of change sets that were inspected. */ + changeSetCount: number; + /** Whether all change sets were in a non-executed (recoverable) state. */ + allChangeSetsNonExecuted: boolean; +} + +/** + * Recover a CloudFormation stack stuck in `REVIEW_IN_PROGRESS`. + * + * This state happens when a `CreateChangeSet` (with `ChangeSetType=CREATE`) + * fails CloudFormation pre-validation (e.g. `AWS::EarlyValidation::PropertyValidation`). + * The resulting stack has no resources but cannot be updated; the only way + * forward is to delete it. + * + * To avoid destructive surprises, this helper refuses to delete the stack + * unless **all** of its change sets are in a non-executed terminal state + * (`FAILED` or `OBSOLETE`). If any change set has been successfully executed, + * it throws an error and asks the user to inspect the stack manually. + */ +export async function recoverReviewInProgressStack( + region: string, + stackName: string, + options: RecoverReviewInProgressOptions = {} +): Promise { + const cfn = options.client ?? new CloudFormationClient({ region, credentials: getCredentialProvider() }); + const timeoutMs = options.timeoutMs ?? 5 * 60 * 1000; + const pollIntervalMs = options.pollIntervalMs ?? 5_000; + + const startedAt = Date.now(); + + // 1. Verify the stack is actually in REVIEW_IN_PROGRESS + let describe; + try { + describe = await cfn.send(new DescribeStacksCommand({ StackName: stackName })); + } catch (err: unknown) { + if (isStackNotFoundError(err)) { + throw new Error(`Stack "${stackName}" not found in region ${region}.`); + } + throw err; + } + const stack = describe.Stacks?.[0]; + if (!stack) { + throw new Error(`Stack "${stackName}" not found in region ${region}.`); + } + if (stack.StackStatus !== 'REVIEW_IN_PROGRESS') { + throw new Error( + `Cannot auto-recover stack "${stackName}": expected status REVIEW_IN_PROGRESS but found ${stack.StackStatus}. ` + + `Use the AWS console or CLI to resolve this state manually.` + ); + } + + // 2. List change sets, paginating if necessary + const changeSetSummaries: { Status?: string; ExecutionStatus?: string }[] = []; + let nextToken: string | undefined; + do { + const resp = await cfn.send(new ListChangeSetsCommand({ StackName: stackName, NextToken: nextToken })); + if (resp.Summaries) changeSetSummaries.push(...resp.Summaries); + nextToken = resp.NextToken; + } while (nextToken); + + // 3. Refuse to delete if any change set was successfully executed. + // A change set is "non-executed" when its Status is FAILED/OBSOLETE + // *or* its ExecutionStatus is in NON_EXECUTED_EXECUTION_STATUSES (which + // covers the common `Status=CREATE_COMPLETE, ExecutionStatus=AVAILABLE` + // case that triggered REVIEW_IN_PROGRESS in the first place). + const isNonExecuted = (cs: { Status?: string; ExecutionStatus?: string }) => { + if (cs.Status && NON_EXECUTED_CHANGE_SET_STATUSES.has(cs.Status)) return true; + if (cs.ExecutionStatus && NON_EXECUTED_EXECUTION_STATUSES.has(cs.ExecutionStatus)) return true; + return false; + }; + + // Empty change-set list is a corner case: CloudFormation auto-purges old + // change sets after a while, and users can also delete them manually. The + // stack's `REVIEW_IN_PROGRESS` status is itself authoritative evidence that + // no resources have been provisioned, so we proceed with deletion but emit + // a warning so operators know the safety check could not be performed + // through change-set inspection. + if (changeSetSummaries.length === 0) { + options.onWarning?.( + `Stack "${stackName}" is in REVIEW_IN_PROGRESS but has no change sets to inspect. ` + + `Proceeding with deletion based on stack status alone (REVIEW_IN_PROGRESS implies no resources).` + ); + } else { + const allNonExecuted = changeSetSummaries.every(isNonExecuted); + if (!allNonExecuted) { + throw new Error( + `Refusing to auto-delete stack "${stackName}": at least one change set has been executed. ` + + `Inspect the stack in the AWS console before attempting recovery.` + ); + } + } + + // `Array#every` returns true for empty arrays, which matches the + // warn-and-proceed semantics of the empty-summaries branch above. + const allNonExecuted = changeSetSummaries.every(isNonExecuted); + + // 4. Delete the stack + await cfn.send(new DeleteStackCommand({ StackName: stackName })); + + // 5. Poll until the stack disappears or hits an unrecoverable state. + // Run the body once first (do/while) so a delete that completes + // synchronously — i.e. DescribeStacks immediately returns + // DELETE_COMPLETE or NotFound — is recognised even when the deadline + // has already lapsed (e.g. timeoutMs=0). For non-terminal first + // responses (DELETE_IN_PROGRESS), the loop will still time out as + // expected. + const deadline = Date.now() + timeoutMs; + do { + try { + const resp = await cfn.send(new DescribeStacksCommand({ StackName: stackName })); + const current = resp.Stacks?.[0]; + const currentStatus = current?.StackStatus ?? 'unknown'; + options.onProgress?.({ stackStatus: currentStatus, elapsedMs: Date.now() - startedAt }); + if (!current || current.StackStatus === 'DELETE_COMPLETE') { + return { + deleted: true, + changeSetCount: changeSetSummaries.length, + allChangeSetsNonExecuted: allNonExecuted, + }; + } + if (current.StackStatus === 'DELETE_FAILED') { + throw new Error( + `Failed to delete stack "${stackName}": status ${current.StackStatus} ` + + `(reason: ${current.StackStatusReason ?? 'unknown'}).` + ); + } + } catch (err: unknown) { + // Stack no longer exists — deletion succeeded. Match on both + // ValidationError / ValidationException and "does not exist" so we + // are robust against AWS SDK v3 surface variations (see + // https://github.com/aws/agentcore-cli/issues/907 reviewer notes). + if (isStackNotFoundError(err)) { + options.onProgress?.({ stackStatus: 'DELETE_COMPLETE', elapsedMs: Date.now() - startedAt }); + return { + deleted: true, + changeSetCount: changeSetSummaries.length, + allChangeSetsNonExecuted: allNonExecuted, + }; + } + throw err; + } + if (Date.now() >= deadline) break; + await new Promise(resolve => setTimeout(resolve, pollIntervalMs)); + } while (Date.now() < deadline); + + throw new Error(`Timed out waiting for stack "${stackName}" to delete after ${timeoutMs}ms.`); +} diff --git a/src/cli/cloudformation/stack-status.ts b/src/cli/cloudformation/stack-status.ts index 307d04e6b..afd628d70 100644 --- a/src/cli/cloudformation/stack-status.ts +++ b/src/cli/cloudformation/stack-status.ts @@ -13,11 +13,26 @@ const IN_PROGRESS_STATUSES = new Set([ 'UPDATE_ROLLBACK_IN_PROGRESS', 'UPDATE_COMPLETE_CLEANUP_IN_PROGRESS', 'DELETE_COMPLETE_CLEANUP_IN_PROGRESS', - 'REVIEW_IN_PROGRESS', 'IMPORT_IN_PROGRESS', 'IMPORT_ROLLBACK_IN_PROGRESS', ]); +/** + * `REVIEW_IN_PROGRESS` is special: it occurs after `CreateChangeSet` with + * `ChangeSetType=CREATE` *before* a successful execute. When the underlying + * change set fails CloudFormation early-validation, the stack is permanently + * stuck in this state with zero resources. + * + * The `isRecoverableReview` flag set below merely advertises eligibility for + * the `agentcore deploy --recover` flow; it does **not** itself prove the + * stack is empty. The real safety check (verifying that no change set has + * been executed) lives in `recoverReviewInProgressStack`, which is invoked + * by the recover flow before the stack is deleted. + * + * See: https://github.com/aws/agentcore-cli/issues/907 + */ +const RECOVERABLE_REVIEW_STATUSES = new Set(['REVIEW_IN_PROGRESS']); + /** * CloudFormation stack statuses that indicate the stack is in a failed state * and may require manual intervention. @@ -39,6 +54,12 @@ export interface StackStatusResult { status?: string; /** User-friendly message explaining why deployment is blocked */ message?: string; + /** + * True when the stack is in `REVIEW_IN_PROGRESS` and has no successfully + * executed change sets — i.e. it can be cleaned up via `DeleteStack` and the + * deploy retried. Callers may offer a `--recover` flow when this is set. + */ + isRecoverableReview?: boolean; } /** @@ -49,6 +70,7 @@ export interface StackStatusResult { * - If the stack is in a stable state (*_COMPLETE), deployment can proceed * - If the stack is in progress, deployment must wait * - If the stack is in a failed state, deployment may require manual intervention + * - If the stack is in `REVIEW_IN_PROGRESS`, it can usually be auto-recovered */ export async function checkStackStatus(region: string, stackName: string): Promise { const cfn = new CloudFormationClient({ region, credentials: getCredentialProvider() }); @@ -64,6 +86,20 @@ export async function checkStackStatus(region: string, stackName: string): Promi const status = stack.StackStatus; + // Stuck in REVIEW_IN_PROGRESS — treat as recoverable, with a clearer message + if (RECOVERABLE_REVIEW_STATUSES.has(status)) { + return { + canDeploy: false, + exists: true, + status, + isRecoverableReview: true, + message: + `Stack "${stackName}" is in ${status} state. This usually means a previous deployment ` + + `failed CloudFormation early validation (for example, an unsupported runtime in this region). ` + + `Run \`agentcore deploy --recover\` to delete the empty stack and retry.`, + }; + } + // Check if stack is in a transitional state if (IN_PROGRESS_STATUSES.has(status)) { return { diff --git a/src/cli/commands/deploy/actions.ts b/src/cli/commands/deploy/actions.ts index 0669dff44..f0fcc05f0 100644 --- a/src/cli/commands/deploy/actions.ts +++ b/src/cli/commands/deploy/actions.ts @@ -15,8 +15,9 @@ import { parsePolicyEngineOutputs, parsePolicyOutputs, parseRuntimeEndpointOutputs, + recoverReviewInProgressStack, } from '../../cloudformation'; -import { getErrorMessage } from '../../errors'; +import { getErrorMessage, isEarlyValidationError, isReviewInProgressError } from '../../errors'; import { ExecLogger } from '../../logging'; import { bootstrapEnvironment, @@ -51,6 +52,13 @@ export interface ValidatedDeployOptions { verbose?: boolean; plan?: boolean; diff?: boolean; + /** + * If set, attempt to recover stacks stuck in `REVIEW_IN_PROGRESS` (caused + * by a prior failed CloudFormation early-validation) by deleting them + * before deployment proceeds. See + * https://github.com/aws/agentcore-cli/issues/907. + */ + recover?: boolean; onProgress?: (step: string, status: 'start' | 'success' | 'error') => void; onResourceEvent?: (message: string) => void; } @@ -305,7 +313,64 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise(); + while ( + !deployabilityCheck.canDeploy && + options.recover && + deployabilityCheck.isRecoverableReview && + deployabilityCheck.blockingStack && + !recoveredStacks.has(deployabilityCheck.blockingStack) + ) { + const blockingStack = deployabilityCheck.blockingStack; + logger.log(`Recovering stack "${blockingStack}" stuck in REVIEW_IN_PROGRESS...`, 'warn'); + try { + await recoverReviewInProgressStack(target.region, blockingStack, { + onWarning: msg => logger.log(msg, 'warn'), + onProgress: ({ stackStatus, elapsedMs }) => { + // Heartbeat every poll so the deploy step doesn't look hung. + logger.log(` recovery: ${blockingStack} status=${stackStatus} (${Math.floor(elapsedMs / 1000)}s)`); + }, + }); + logger.log(`Recovered stack "${blockingStack}".`); + recoveredStacks.add(blockingStack); + deployabilityCheck = await checkStackDeployability(target.region, stackNames); + } catch (recoverErr: unknown) { + const recoverErrorMessage = getErrorMessage(recoverErr); + endStep('error', recoverErrorMessage); + logger.finalize(false); + return { + success: false, + error: `Stack recovery failed: ${recoverErrorMessage}`, + logPath: logger.getRelativeLogPath(), + }; + } + } + + // If --recover was requested but the blocking stack is not in a + // recoverable state, surface that explicitly so users don't think the + // flag silently did nothing. + if ( + !deployabilityCheck.canDeploy && + options.recover && + !deployabilityCheck.isRecoverableReview && + deployabilityCheck.blockingStack + ) { + logger.log( + `--recover requested, but stack "${deployabilityCheck.blockingStack}" is not in an auto-recoverable state. Skipping recovery.`, + 'warn' + ); + } + if (!deployabilityCheck.canDeploy) { endStep('error', deployabilityCheck.message); logger.finalize(false); @@ -685,9 +750,42 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise 0 ? postDeployWarnings : undefined, }; } catch (err: unknown) { - logger.log(getErrorMessage(err), 'error'); + const errorMessage = getErrorMessage(err); + logger.log(errorMessage, 'error'); logger.finalize(false); - return { success: false, error: getErrorMessage(err), logPath: logger.getRelativeLogPath() }; + + // Upgrade the user-facing message for CloudFormation early-validation + // failures (e.g. PYTHON_3_14 unavailable in target region). These leave + // the stack stuck in REVIEW_IN_PROGRESS; point the user at `--recover` + // and a known-good runtime. See + // https://github.com/aws/agentcore-cli/issues/907. + if (isEarlyValidationError(err)) { + const friendly = + `${errorMessage}\n\n` + + `CloudFormation rejected the template before any resources were created. ` + + `This is commonly caused by an unsupported runtime in this region (for example, ` + + `PYTHON_3_14 outside us-east-1 / us-west-2). ` + + `Try changing runtimeVersion to "PYTHON_3_13" in agentcore.json, or deploy in a ` + + `supported region. ` + + `Then run \`agentcore deploy --recover\` to delete the empty stack stuck in ` + + `REVIEW_IN_PROGRESS and retry.`; + return { success: false, error: friendly, logPath: logger.getRelativeLogPath() }; + } + + // If we hit a stack-stuck-in-REVIEW_IN_PROGRESS error here (e.g. retried + // deploy without --recover after a previous early-validation failure), + // point the user at the recovery flow. See + // https://github.com/aws/agentcore-cli/issues/907. + if (isReviewInProgressError(err)) { + const friendly = + `${errorMessage}\n\n` + + `The CloudFormation stack is stuck in REVIEW_IN_PROGRESS, which usually means a ` + + `prior deploy failed CloudFormation early validation. ` + + `Run \`agentcore deploy --recover\` to delete the empty stack and retry.`; + return { success: false, error: friendly, logPath: logger.getRelativeLogPath() }; + } + + return { success: false, error: errorMessage, logPath: logger.getRelativeLogPath() }; } finally { if (toolkitWrapper) { await toolkitWrapper.dispose(); diff --git a/src/cli/commands/deploy/command.tsx b/src/cli/commands/deploy/command.tsx index 5bc6e96d1..65ddd3621 100644 --- a/src/cli/commands/deploy/command.tsx +++ b/src/cli/commands/deploy/command.tsx @@ -75,6 +75,7 @@ async function handleDeployCLI(options: DeployOptions): Promise { verbose: options.verbose ?? options.diff, plan: options.plan, diff: options.diff, + recover: options.recover, onProgress, onResourceEvent, }); @@ -146,6 +147,10 @@ export const registerDeploy = (program: Command) => { .option('--json', 'Output as JSON [non-interactive]') .option('--dry-run', 'Preview deployment without deploying [non-interactive]') .option('--diff', 'Show CDK diff without deploying [non-interactive]') + .option( + '--recover', + 'Auto-delete stacks stuck in REVIEW_IN_PROGRESS before deploying (see issue #907). Combine with --yes if AWS bootstrap may also be needed. [non-interactive]' + ) .action( async (cliOptions: { target?: string; @@ -154,10 +159,18 @@ export const registerDeploy = (program: Command) => { json?: boolean; dryRun?: boolean; diff?: boolean; + recover?: boolean; }) => { try { requireProject(); - if (cliOptions.json || cliOptions.target || cliOptions.dryRun || cliOptions.yes || cliOptions.verbose) { + if ( + cliOptions.json || + cliOptions.target || + cliOptions.dryRun || + cliOptions.yes || + cliOptions.verbose || + cliOptions.recover + ) { // CLI mode - any flag triggers non-interactive mode const options = { ...cliOptions, diff --git a/src/cli/commands/deploy/types.ts b/src/cli/commands/deploy/types.ts index 44cdc7847..ddad6a96f 100644 --- a/src/cli/commands/deploy/types.ts +++ b/src/cli/commands/deploy/types.ts @@ -6,6 +6,13 @@ export interface DeployOptions { json?: boolean; plan?: boolean; diff?: boolean; + /** + * If set, attempt to recover stacks stuck in `REVIEW_IN_PROGRESS` (caused + * by a prior failed CloudFormation early-validation) by deleting them + * before deployment proceeds. See + * https://github.com/aws/agentcore-cli/issues/907. + */ + recover?: boolean; } export interface DeployResult { diff --git a/src/cli/commands/validate/__tests__/action.test.ts b/src/cli/commands/validate/__tests__/action.test.ts index d6774f252..6296c9baf 100644 --- a/src/cli/commands/validate/__tests__/action.test.ts +++ b/src/cli/commands/validate/__tests__/action.test.ts @@ -204,4 +204,180 @@ describe('handleValidate', () => { expect(result.success).toBe(false); expect(result.error).toBe('string error'); }); + + it('emits a warning when PYTHON_3_14 is used in an unsupported region', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [{ name: 'agent1', runtimeVersion: 'PYTHON_3_14' }], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'eu-central-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeDefined(); + expect(result.warnings![0]).toContain('Python 3.14'); + expect(result.warnings![0]).toContain('eu-central-1'); + expect(result.warnings![0]).toContain('agent1'); + }); + + it('does not warn when PYTHON_3_14 is used in a supported region', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [{ name: 'agent1', runtimeVersion: 'PYTHON_3_14' }], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'us-east-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeUndefined(); + }); + + it('does not warn for non-PYTHON_3_14 runtimes regardless of region', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [{ name: 'agent1', runtimeVersion: 'PYTHON_3_13' }], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'eu-central-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeUndefined(); + }); + + it('lists multiple PYTHON_3_14 agents in a single warning', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [ + { name: 'agentA', runtimeVersion: 'PYTHON_3_14' }, + { name: 'agentB', runtimeVersion: 'PYTHON_3_14' }, + { name: 'agentC', runtimeVersion: 'PYTHON_3_13' }, + ], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([ + { name: 'a', region: 'eu-central-1', account: '111' }, + { name: 'b', region: 'us-east-1', account: '111' }, + { name: 'c', region: 'eu-central-1', account: '222' }, + ]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeDefined(); + expect(result.warnings!.length).toBe(1); + const w = result.warnings![0]; + expect(w).toContain('agentA'); + expect(w).toContain('agentB'); + expect(w).not.toContain('agentC'); + // The unsupported-regions list should contain eu-central-1 exactly once + // (deduped) and should not list us-east-1, since us-east-1 *is* supported. + // Note that us-east-1 may appear in the trailing "deploy in one of:" hint; + // we therefore only inspect the substring up to that hint. + const beforeHint = w!.split('Switch to')[0]!; + expect(beforeHint.match(/eu-central-1/g)?.length ?? 0).toBe(1); + expect(beforeHint).not.toContain('us-east-1'); + }); + + it('does not warn or crash when projectSpec has no runtimes', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ name: 'Test', version: 1, managedBy: 'CDK' as const }); + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'eu-central-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeUndefined(); + }); + + it('warns when an MCP runtime tool uses PYTHON_3_14 in an unsupported region', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + mcpRuntimeTools: [ + { + name: 'tool-x', + compute: { host: 'AgentCoreRuntime', runtime: { pythonVersion: 'PYTHON_3_14' } }, + }, + ], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'eu-central-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeDefined(); + expect(result.warnings![0]).toContain('MCP tool "tool-x"'); + expect(result.warnings![0]).toContain('eu-central-1'); + }); + + it('warns when a gateway Lambda target uses PYTHON_3_14 in an unsupported region', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + agentCoreGateways: [ + { + name: 'gw1', + targets: [ + { + name: 'tgt1', + compute: { host: 'Lambda', pythonVersion: 'PYTHON_3_14' }, + }, + ], + }, + ], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'eu-central-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeDefined(); + expect(result.warnings![0]).toContain('gateway "gw1"'); + expect(result.warnings![0]).toContain('tgt1'); + }); + + it('does not warn for non-canonical casing of a supported region', async () => { + mockFindConfigRoot.mockReturnValue('/project/agentcore'); + mockReadProjectSpec.mockResolvedValue({ + name: 'Test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [{ name: 'agent1', runtimeVersion: 'PYTHON_3_14' }], + }); + // Should not warn even though casing is non-canonical + mockReadAWSDeploymentTargets.mockResolvedValue([{ name: 'default', region: 'US-EAST-1', account: '111' }]); + mockConfigExists.mockReturnValue(false); + + const result = await handleValidate({}); + + expect(result.success).toBe(true); + expect(result.warnings).toBeUndefined(); + }); }); diff --git a/src/cli/commands/validate/action.ts b/src/cli/commands/validate/action.ts index 572f5cf7d..109d92c35 100644 --- a/src/cli/commands/validate/action.ts +++ b/src/cli/commands/validate/action.ts @@ -7,6 +7,7 @@ import { NoProjectError, findConfigRoot, } from '../../../lib'; +import { PYTHON_3_14_SUPPORTED_REGIONS } from '../../../schema'; export interface ValidateOptions { directory?: string; @@ -15,6 +16,11 @@ export interface ValidateOptions { export interface ValidateResult { success: boolean; error?: string; + /** + * Non-fatal warnings (e.g. selected runtime is not yet GA in the configured + * region). The CLI prints these but does not fail validation. + */ + warnings?: string[]; } /** @@ -36,15 +42,17 @@ export async function handleValidate(options: ValidateOptions): Promise>; try { - await configIO.readProjectSpec(); + projectSpec = await configIO.readProjectSpec(); } catch (err) { return { success: false, error: formatError(err, 'agentcore.json') }; } // Validate AWS targets (aws-targets.json) + let awsTargets: Awaited>; try { - await configIO.readAWSDeploymentTargets(); + awsTargets = await configIO.readAWSDeploymentTargets(); } catch (err) { return { success: false, error: formatError(err, 'aws-targets.json') }; } @@ -58,7 +66,118 @@ export async function handleValidate(options: ValidateOptions): Promise 0 ? { success: true, warnings } : { success: true }; +} + +interface ProjectSpecForWarnings { + runtimes?: Array<{ name: string; runtimeVersion?: string }>; + /** + * MCP runtime tools deployed as AgentCore Runtime compute. The Python + * version lives at `compute.runtime.pythonVersion`. + */ + mcpRuntimeTools?: Array<{ + name: string; + compute?: { + host?: string; + runtime?: { pythonVersion?: string }; + pythonVersion?: string; + }; + }>; + /** + * MCP gateways with Lambda or AgentCoreRuntime compute targets. Lambda + * targets carry pythonVersion directly on the compute; AgentCoreRuntime + * targets carry it under `compute.runtime.pythonVersion`. + */ + agentCoreGateways?: Array<{ + name: string; + targets?: Array<{ + name?: string; + compute?: { + host?: string; + runtime?: { pythonVersion?: string }; + pythonVersion?: string; + }; + }>; + }>; +} + +interface AwsTargetForWarnings { + region: string; +} + +/** + * Extracts the Python runtime version (if any) from a compute config object, + * regardless of host shape (Lambda's flat `pythonVersion` or AgentCoreRuntime's + * nested `runtime.pythonVersion`). + */ +function getPythonVersionFromCompute( + compute: { runtime?: { pythonVersion?: string }; pythonVersion?: string } | undefined +): string | undefined { + if (!compute) return undefined; + return compute.runtime?.pythonVersion ?? compute.pythonVersion; +} + +/** + * Returns a list of non-fatal validation warnings. Currently only checks + * Python 3.14 region availability across agents, MCP runtime tools, and + * gateway targets. + */ +export function collectRuntimeRegionWarnings( + projectSpec: ProjectSpecForWarnings, + awsTargets: readonly AwsTargetForWarnings[] +): string[] { + const warnings: string[] = []; + + // Collect every component that selected PYTHON_3_14, with a friendly label + // for the warning message. + const py314Components: string[] = []; + + for (const r of projectSpec.runtimes ?? []) { + if (r.runtimeVersion === 'PYTHON_3_14') py314Components.push(`agent "${r.name}"`); + } + + for (const tool of projectSpec.mcpRuntimeTools ?? []) { + if (getPythonVersionFromCompute(tool.compute) === 'PYTHON_3_14') { + py314Components.push(`MCP tool "${tool.name}"`); + } + } + + for (const gw of projectSpec.agentCoreGateways ?? []) { + for (const tgt of gw.targets ?? []) { + if (getPythonVersionFromCompute(tgt.compute) === 'PYTHON_3_14') { + const targetLabel = tgt.name ? `target "${tgt.name}"` : 'target'; + py314Components.push(`gateway "${gw.name}" ${targetLabel}`); + } + } + } + + if (py314Components.length === 0) return warnings; + + const unsupportedRegions = Array.from( + new Set( + awsTargets + .map(t => t.region) + // AWS regions are canonically lowercase, but harden against any + // non-canonical casing that may slip through aws-targets parsing. + .filter(region => !PYTHON_3_14_SUPPORTED_REGIONS.includes(region.toLowerCase())) + ) + ); + + if (unsupportedRegions.length === 0) return warnings; + + warnings.push( + `Component(s) [${py314Components.join(', ')}] use Python 3.14, which is not yet ` + + `available in region(s): ${unsupportedRegions.join(', ')}. ` + + `CloudFormation will reject the deployment with an early-validation error. ` + + `Switch to "PYTHON_3_13" or deploy in one of: ${PYTHON_3_14_SUPPORTED_REGIONS.join(', ')}.` + ); + + return warnings; } function formatError(err: unknown, fileName: string): string { diff --git a/src/cli/commands/validate/command.tsx b/src/cli/commands/validate/command.tsx index b4149a804..64f32ec50 100644 --- a/src/cli/commands/validate/command.tsx +++ b/src/cli/commands/validate/command.tsx @@ -1,7 +1,8 @@ import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; import { handleValidate } from './action'; import type { Command } from '@commander-js/extra-typings'; -import { Text, render } from 'ink'; +import { Box, Text, render } from 'ink'; +import React from 'react'; export const registerValidate = (program: Command) => { program @@ -12,7 +13,17 @@ export const registerValidate = (program: Command) => { const result = await handleValidate(options); if (result.success) { - render(Valid); + const warnings = result.warnings ?? []; + render( + + {warnings.map((w, idx) => ( + + Warning: {w} + + ))} + Valid + + ); process.exit(0); } else { render({result.error}); diff --git a/src/cli/errors.ts b/src/cli/errors.ts index 9e7e9397d..2e2b955b5 100644 --- a/src/cli/errors.ts +++ b/src/cli/errors.ts @@ -158,3 +158,42 @@ export function isChangesetInProgressError(err: unknown): boolean { return false; } + +/** + * Checks if an error originates from CloudFormation early validation hooks + * (e.g. `AWS::EarlyValidation::PropertyValidation`, or future hook subtypes + * under the `AWS::EarlyValidation::` namespace). These errors typically + * indicate that the template references a value the service does not support + * in the target region (for example, a Python runtime not yet GA in that + * region — see https://github.com/aws/agentcore-cli/issues/907). + */ +export function isEarlyValidationError(err: unknown): boolean { + const message = getErrorMessage(err).toLowerCase(); + // Match any `AWS::EarlyValidation::*` hook (PropertyValidation today, but + // CloudFormation may add more subtypes). The single `earlyvalidation::` + // substring is sufficient because CFN renders the namespace separator + // consistently. Also accept the older "hook(s)/validation failed" framing + // for cases where the hook name is logged with different separators. + if (message.includes('earlyvalidation::')) return true; + if (message.includes('hook(s)/validation failed') && message.includes('earlyvalidation')) return true; + return false; +} + +/** + * Checks if an error indicates the stack is stuck in `REVIEW_IN_PROGRESS`. + * This is a recoverable state — the stack contains no resources and can be + * deleted via `agentcore deploy --recover`. + * + * Matches the specific phrasings CloudFormation uses when surfacing this + * state ("is in REVIEW_IN_PROGRESS state", "currently in REVIEW_IN_PROGRESS") + * rather than a bare token, so that event-stream messages that merely + * mention `REVIEW_IN_PROGRESS` in passing (e.g. as part of a status history) + * do not trigger a misleading `--recover` hint. + */ +export function isReviewInProgressError(err: unknown): boolean { + const message = getErrorMessage(err).toLowerCase(); + if (message.includes('is in review_in_progress')) return true; + if (message.includes('currently in review_in_progress')) return true; + if (message.includes('review_in_progress state')) return true; + return false; +} diff --git a/src/cli/operations/deploy/preflight.ts b/src/cli/operations/deploy/preflight.ts index ba423a088..b252e31b4 100644 --- a/src/cli/operations/deploy/preflight.ts +++ b/src/cli/operations/deploy/preflight.ts @@ -34,6 +34,12 @@ export interface StackStatusCheckResult { blockingStack?: string; /** User-friendly message explaining why deployment is blocked */ message?: string; + /** + * True when the blocking stack is in `REVIEW_IN_PROGRESS` and can usually + * be recovered via `agentcore deploy --recover`. See + * https://github.com/aws/agentcore-cli/issues/907. + */ + isRecoverableReview?: boolean; } /** @@ -251,6 +257,7 @@ export async function checkStackDeployability(region: string, stackNames: string canDeploy: false, blockingStack: blocking.stackName, message: blocking.result.message, + isRecoverableReview: blocking.result.isRecoverableReview, }; } diff --git a/src/cli/tui/screens/mcp/types.ts b/src/cli/tui/screens/mcp/types.ts index 59be5abe7..82e8ad061 100644 --- a/src/cli/tui/screens/mcp/types.ts +++ b/src/cli/tui/screens/mcp/types.ts @@ -277,8 +277,8 @@ export const POLICY_ENGINE_MODE_OPTIONS = [ ] as const; export const PYTHON_VERSION_OPTIONS = [ - { id: 'PYTHON_3_14', title: 'Python 3.14', description: 'Latest' }, - { id: 'PYTHON_3_13', title: 'Python 3.13', description: '' }, + { id: 'PYTHON_3_13', title: 'Python 3.13', description: 'Recommended' }, + { id: 'PYTHON_3_14', title: 'Python 3.14', description: 'Preview – not available in all regions' }, { id: 'PYTHON_3_12', title: 'Python 3.12', description: '' }, { id: 'PYTHON_3_11', title: 'Python 3.11', description: '' }, { id: 'PYTHON_3_10', title: 'Python 3.10', description: '' }, diff --git a/src/schema/constants.ts b/src/schema/constants.ts index d235a0df1..d2266122b 100644 --- a/src/schema/constants.ts +++ b/src/schema/constants.ts @@ -146,8 +146,33 @@ export function isReservedProjectName(name: string): boolean { export const PythonRuntimeSchema = z.enum(['PYTHON_3_10', 'PYTHON_3_11', 'PYTHON_3_12', 'PYTHON_3_13', 'PYTHON_3_14']); export type PythonRuntime = z.infer; -/** Default Python runtime version for new agents and MCP tools */ -export const DEFAULT_PYTHON_VERSION: PythonRuntime = 'PYTHON_3_14'; +/** + * Default Python runtime version for new agents and MCP tools. + * + * NOTE: PYTHON_3_14 is intentionally NOT used as the default because it is not + * yet available in all AWS regions (only us-east-1 / us-west-2 at the time of + * writing). When users in other regions accepted the default, CloudFormation + * rejected the deploy with `AWS::EarlyValidation::PropertyValidation`, leaving + * the stack stuck in `REVIEW_IN_PROGRESS`. See + * https://github.com/aws/agentcore-cli/issues/907 for details. PYTHON_3_13 is + * GA in all AgentCore regions and is therefore the safer default. + */ +export const DEFAULT_PYTHON_VERSION: PythonRuntime = 'PYTHON_3_13'; + +/** + * AWS regions where PYTHON_3_14 is currently supported by Bedrock AgentCore / + * CloudFormation. Update as availability expands. Callers should compare + * directly with this list rather than wrap it in a helper, so the rule lives + * in one place. + * + * NOTE: When this list changes, also update the equivalent constant in the + * sibling repo `aws/agentcore-l3-cdk-constructs` (see + * `src/cdk/constructs/components/mcp/mcp-utils.ts`, + * `DEFAULT_MCP_PYTHON_VERSION`). The two repos do not currently share a + * package, so they can drift; consider extracting a shared package if a third + * region becomes supported. See https://github.com/aws/agentcore-cli/issues/907. + */ +export const PYTHON_3_14_SUPPORTED_REGIONS: readonly string[] = ['us-east-1', 'us-west-2'] as const; export const NodeRuntimeSchema = z.enum(['NODE_18', 'NODE_20', 'NODE_22']); export type NodeRuntime = z.infer;