diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index b2e9775..ba84cf8 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ "plugins": [ { "category": "deployment", - "description": "Deploy applications to AWS with architecture recommendations, cost estimates, and IaC deployment.", + "description": "Deploy applications to AWS with architecture recommendations, cost estimates, CDK best practices, monitoring setup, and IaC deployment.", "keywords": [ "aws", "aws agent skills", @@ -20,7 +20,9 @@ "cdk", "cloudformation", "infrastructure", - "pricing" + "pricing", + "monitoring", + "cloudwatch" ], "name": "deploy-on-aws", "source": "./plugins/deploy-on-aws", @@ -28,9 +30,10 @@ "aws", "deploy", "infrastructure", - "cdk" + "cdk", + "monitoring" ], - "version": "1.0.0" + "version": "1.1.0" } ] } diff --git a/plugins/deploy-on-aws/.claude-plugin/plugin.json b/plugins/deploy-on-aws/.claude-plugin/plugin.json index 5df6a70..f156154 100644 --- a/plugins/deploy-on-aws/.claude-plugin/plugin.json +++ b/plugins/deploy-on-aws/.claude-plugin/plugin.json @@ -2,7 +2,7 @@ "author": { "name": "Amazon Web Services" }, - "description": "Deploy applications to AWS with architecture recommendations, cost estimates, and IaC deployment.", + "description": "Deploy applications to AWS with architecture recommendations, cost estimates, CDK best practices, monitoring setup, and IaC deployment.", "homepage": "https://github.com/awslabs/agent-plugins", "keywords": [ "aws", @@ -10,10 +10,12 @@ "infrastructure", "cdk", "cloudformation", - "pricing" + "pricing", + "monitoring", + "cloudwatch" ], "license": "Apache-2.0", "name": "deploy-on-aws", "repository": "https://github.com/awslabs/agent-plugins", - "version": "1.0.0" + "version": "1.1.0" } diff --git a/plugins/deploy-on-aws/skills/deploy/SKILL.md b/plugins/deploy-on-aws/skills/deploy/SKILL.md index dfc8d77..8e85c98 100644 --- a/plugins/deploy-on-aws/skills/deploy/SKILL.md +++ b/plugins/deploy-on-aws/skills/deploy/SKILL.md @@ -17,14 +17,19 @@ straightforward services. Don't ask questions with obvious answers. 1. **Analyze** - Scan codebase for framework, database, dependencies 2. **Recommend** - Select AWS services, concisely explain rationale 3. **Estimate** - Show monthly cost before proceeding -4. **Generate** - Write IaC code with [security defaults](references/security.md) applied -5. **Deploy** - Run security checks, then execute with user confirmation +4. **Generate** - Write IaC code following [CDK best practices](references/cdk-best-practices.md) + with [security defaults](references/security.md) applied +5. **Validate** - Run synthesis, security scans, and + [validation script](scripts/validate-stack.sh) +6. **Deploy** - Execute with user confirmation +7. **Monitor** - Set up [monitoring](references/monitoring.md) for deployed resources ## Defaults See [defaults.md](references/defaults.md) for the complete service selection matrix. -Core principle: Default to **dev-sized** (cost-conscious: small instance sizes, minimal redundancy, and non-HA/single-AZ defaults) unless user says "production-ready". +Core principle: Default to **dev-sized** (cost-conscious: small instance sizes, minimal +redundancy, and non-HA/single-AZ defaults) unless user says "production-ready". ## MCP Servers @@ -48,13 +53,69 @@ for query patterns. Consult for IaC best practices. Use when writing CDK/CloudFormation/Terraform to ensure patterns follow AWS recommendations. +## CDK Best Practices + +When generating IaC (default: CDK TypeScript), follow these rules: + +- **No explicit resource names** — let CDK generate unique names +- **Use grant methods** for IAM — `table.grantReadWriteData(fn)` not raw policies +- **Use language-specific Lambda constructs** — `NodejsFunction`, `PythonFunction` +- **Prefer L2/L3 constructs** over L1 (`CfnXxx`) +- **Add cdk-nag** for automated best-practice validation + +See [cdk-best-practices.md](references/cdk-best-practices.md) for patterns and examples. + +## Pre-Deployment Validation + +Before deploying, run these checks in order: + +1. Build — ensure compilation succeeds +2. Tests — run existing test suite +3. `cdk synth` — validate synthesis (with cdk-nag if configured) +4. Security scan — `checkov` or `cfn-nag` on generated templates +5. Secret detection — scan for hardcoded credentials + +Use [validate-stack.sh](scripts/validate-stack.sh) to automate synthesis validation +and template analysis (steps 3). Run `checkov` or `cfn-nag` separately for step 4. + +## Error Handling + +### Validation Failures + +If `cdk synth` or validation script fails: + +- Show the error output to the user +- Identify and fix the issue in generated code +- Re-run validation before proceeding to deploy +- DO NOT deploy with failing validation + +### Deployment Failures + +If `cdk deploy` fails: + +- Show the CloudFormation error event +- Suggest fix based on error type +- Stack will auto-rollback — no manual cleanup needed + +## Post-Deployment Monitoring + +After successful deployment, set up monitoring appropriate to the environment: + +- **Dev**: Basic error alerting (Lambda errors, Fargate task failures) +- **Production**: Full observability (alarms, dashboards, structured logging) + +See [monitoring.md](references/monitoring.md) for CloudWatch alarm patterns by service. + ## Principles - Concisely explain why each service was chosen - Always show cost estimate before generating code -- Apply [security defaults](references/security.md) automatically (encryption, private subnets, least privilege) +- Apply [security defaults](references/security.md) automatically (encryption, + private subnets, least privilege) +- Follow [CDK best practices](references/cdk-best-practices.md) when generating IaC - Run IaC security scans (cfn-nag, checkov) before deployment -- Don't ask "Lambda or Fargate?" - just pick the obvious one +- Set up [monitoring](references/monitoring.md) after deployment +- Don't ask "Lambda or Fargate?" — just pick the obvious one - If genuinely ambiguous, then ask ## References @@ -62,3 +123,6 @@ to ensure patterns follow AWS recommendations. - [Service defaults](references/defaults.md) - [Security defaults](references/security.md) - [Cost estimation patterns](references/cost-estimation.md) +- [CDK best practices](references/cdk-best-practices.md) +- [Monitoring and observability](references/monitoring.md) +- [Validation script](scripts/validate-stack.sh) diff --git a/plugins/deploy-on-aws/skills/deploy/references/cdk-best-practices.md b/plugins/deploy-on-aws/skills/deploy/references/cdk-best-practices.md new file mode 100644 index 0000000..b952a9d --- /dev/null +++ b/plugins/deploy-on-aws/skills/deploy/references/cdk-best-practices.md @@ -0,0 +1,63 @@ +# CDK Best Practices + +Patterns for generating CDK IaC in the deploy workflow. + +## Resource Naming + +**DO NOT** explicitly specify resource names. Let CDK generate unique names: + +```typescript +// ✅ Let CDK generate: StackName-MyFunctionXXXXXX +new lambda.Function(this, 'MyFunction', { /* no functionName */ }); +``` + +**Why**: Enables reusable patterns, parallel deployments, and stack isolation. + +## Lambda Constructs + +Use language-specific constructs for automatic bundling: + +- **TypeScript**: `NodejsFunction` from `aws-cdk-lib/aws-lambda-nodejs` +- **Python**: `PythonFunction` from `@aws-cdk/aws-lambda-python-alpha` + +Benefits: Automatic dependency resolution, transpilation, and packaging. + +## IAM Permissions + +Use grant methods instead of raw policies: + +```typescript +table.grantReadWriteData(handler); // ✅ +// NOT: handler.addToRolePolicy({ actions: ['dynamodb:*'], resources: ['*'] }) +``` + +## Construct Levels + +Prefer L3 (`LambdaRestApi`) > L2 (`Function`) > L1 (`CfnFunction`). + +## Validation + +1. Add **cdk-nag** for automated best-practice checks during synthesis +2. Run `cdk synth` to validate +3. Suppress findings with documented reasons via `NagSuppressions` + +## Testing + +- **Snapshot tests**: `expect(template.toJSON()).toMatchSnapshot()` +- **Assertions**: `template.hasResourceProperties('AWS::Lambda::Function', { ... })` + +## Stack Organization + +- Split at ~200 resources per stack +- Separate stateful (DB, S3) from stateless (compute) resources +- Export values via `CfnOutput` for cross-stack references + +## Anti-Patterns + +| Anti-Pattern | Fix | +| ----------------------------- | --------------------------------------- | +| Hardcoded resource names | Let CDK generate names | +| `actions: ['*']` in IAM | Use grant methods | +| Manual Lambda bundling | Use `NodejsFunction` / `PythonFunction` | +| Missing environment variables | Pass via `environment` prop | +| No stack outputs | Add `CfnOutput` for API URLs, ARNs | diff --git a/plugins/deploy-on-aws/skills/deploy/references/monitoring.md b/plugins/deploy-on-aws/skills/deploy/references/monitoring.md new file mode 100644 index 0000000..a73bfd8 --- /dev/null +++ b/plugins/deploy-on-aws/skills/deploy/references/monitoring.md @@ -0,0 +1,69 @@ +# Monitoring and Observability + +Post-deployment monitoring patterns. Set up after successful deployment. + +## When to Add Monitoring + +- **Always**: Error alerting for deployed compute (Fargate, Lambda) +- **Production**: Full observability (alarms + dashboards + logs) +- **Dev**: Basic error alerting only + +## Lambda Alarms + +| Metric | Threshold | Periods | +| --------------- | -------------- | ------- | +| Errors (Sum) | 10 per 5 min | 1 | +| Duration (Max) | 80% of timeout | 2 | +| Throttles (Sum) | 5 per 5 min | 1 | + +## ECS/Fargate Alarms + +| Metric | Threshold | Periods | +| ---------------------- | ------------- | ------- | +| CPU Utilization | 80% | 3 | +| Memory Utilization | 85% | 2 | +| Running Task Count < 1 | 1 (less-than) | 2 | + +## ALB Alarms + +| Metric | Threshold | Periods | +| -------------------- | ------------ | ------- | +| 5XX Error Count | 10 per 5 min | 1 | +| Unhealthy Host Count | 1 | 2 | +| Response Time p99 | 1 second | 2 | + +## RDS/Aurora Alarms + +| Metric | Threshold | Periods | +| -------------------- | ---------- | ------- | +| CPU Utilization | 80% | 3 | +| Free Storage Space | < 10 GB | 1 | +| Database Connections | 80% of max | 2 | + +## Alarm Notification + +Use SNS topic with email subscription for alarm actions: + +```typescript +const topic = new sns.Topic(this, 'AlarmTopic'); +topic.addSubscription(new subscriptions.EmailSubscription('ops@example.com')); +alarm.addAlarmAction(new actions.SnsAction(topic)); +``` + +## Threshold Guidelines + +| Category | Warning | Critical | +| ----------- | ------------ | ----------- | +| CPU/Memory | 70-80% | 80-90% | +| Error rate | Based on SLA | 2× warning | +| Latency p99 | 80% of SLA | 100% of SLA | +| Storage | 70% used | 85% used | + +## Production Dashboard + +Include these widget groups: + +1. **Service Overview**: Request rate, error %, latency (p50/p95/p99) +2. **Resource Utilization**: CPU, memory, network by service +3. **Cost Metrics**: Daily spend, month-to-date +4. **Errors**: Error counts by type, recent logs diff --git a/plugins/deploy-on-aws/skills/deploy/scripts/validate-stack.sh b/plugins/deploy-on-aws/skills/deploy/scripts/validate-stack.sh new file mode 100755 index 0000000..90faff5 --- /dev/null +++ b/plugins/deploy-on-aws/skills/deploy/scripts/validate-stack.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +# CDK Stack Validation Script for deploy-on-aws +# +# Pre-deployment validation of synthesized CDK stacks. +# Checks synthesis success, template size, resource counts, +# and cdk-nag integration. +# +# Usage: ./scripts/validate-stack.sh [project-root] + +set -e + +PROJECT_ROOT="${1:-$(pwd)}" + +echo "🔍 CDK Stack Validation" +echo "========================" +echo "" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +VALIDATION_PASSED=true + +success() { echo -e "${GREEN}✓${NC} $1"; } +error() { echo -e "${RED}✗${NC} $1"; VALIDATION_PASSED=false; } +warning() { echo -e "${YELLOW}⚠${NC} $1"; } +info() { echo "ℹ $1"; } + +# Check CDK CLI +if ! command -v cdk &> /dev/null; then + error "AWS CDK CLI not found. Install with: npm install -g aws-cdk" + exit 1 +fi +success "AWS CDK CLI found ($(cdk --version 2>/dev/null | head -1))" + +# Detect language +CDK_LANG="unknown" +if [ -f "${PROJECT_ROOT}/package.json" ]; then + CDK_LANG="typescript" + info "Detected TypeScript/JavaScript CDK project" +elif [ -f "${PROJECT_ROOT}/requirements.txt" ] || [ -f "${PROJECT_ROOT}/setup.py" ]; then + CDK_LANG="python" + info "Detected Python CDK project" +elif [ -f "${PROJECT_ROOT}/pom.xml" ]; then + CDK_LANG="java" + info "Detected Java CDK project" +elif [ -f "${PROJECT_ROOT}/go.mod" ]; then + CDK_LANG="go" + info "Detected Go CDK project" +else + warning "Could not detect CDK project language" +fi + +# Run synthesis (uses app command from cdk.json) +echo "" +info "Running CDK synthesis..." +if cdk synth --quiet > /dev/null 2>&1; then + success "CDK synthesis successful" +else + error "CDK synthesis failed — run 'cdk synth' for details" + exit 1 +fi + +# Check cdk-nag integration +echo "" +info "Checking cdk-nag integration..." +case "$CDK_LANG" in + typescript) + if grep -q "cdk-nag" "${PROJECT_ROOT}/package.json" 2>/dev/null; then + success "cdk-nag found in package.json" + else + warning "cdk-nag not found — recommended: npm install --save-dev cdk-nag" + fi + ;; + python) + if grep -q "cdk-nag" "${PROJECT_ROOT}/requirements.txt" 2>/dev/null; then + success "cdk-nag found in requirements.txt" + else + warning "cdk-nag not found — recommended: pip install cdk-nag" + fi + ;; +esac + +# Validate synthesized templates +echo "" +info "Checking synthesized templates..." + +TEMPLATES=$(find "${PROJECT_ROOT}/cdk.out" -name "*.template.json" 2>/dev/null || echo "") + +if [ -z "$TEMPLATES" ]; then + error "No CloudFormation templates found in cdk.out/" + exit 1 +fi + +TEMPLATE_COUNT=$(echo "$TEMPLATES" | wc -l) +success "Found ${TEMPLATE_COUNT} template(s)" + +for template in $TEMPLATES; do + STACK_NAME=$(basename "$template" .template.json) + TEMPLATE_SIZE=$(wc -c < "$template") + + if [ "$TEMPLATE_SIZE" -gt 51200 ]; then + warning "${STACK_NAME}: Template ${TEMPLATE_SIZE} bytes (large — consider nested stacks)" + fi + + if command -v jq &> /dev/null; then + RESOURCE_COUNT=$(jq '.Resources | length' "$template" 2>/dev/null || echo 0) + if [ "$RESOURCE_COUNT" -gt 200 ]; then + warning "${STACK_NAME}: ${RESOURCE_COUNT} resources (consider splitting)" + else + success "${STACK_NAME}: ${RESOURCE_COUNT} resources" + fi + else + success "${STACK_NAME}: template OK (${TEMPLATE_SIZE} bytes)" + fi +done + +# Summary +echo "" +echo "========================" +if [ "$VALIDATION_PASSED" = true ]; then + echo -e "${GREEN}✓ Validation passed${NC}" + exit 0 +else + echo -e "${RED}✗ Validation failed${NC}" + exit 1 +fi