diff --git a/.github/workflows/deploy_dev.yml b/.github/workflows/deploy_dev_full.yml
similarity index 97%
rename from .github/workflows/deploy_dev.yml
rename to .github/workflows/deploy_dev_full.yml
index 36e1a969..d9c169e0 100644
--- a/.github/workflows/deploy_dev.yml
+++ b/.github/workflows/deploy_dev_full.yml
@@ -1,4 +1,4 @@
-name: Dev Deploy
+name: Dev Deploy Infra and Lambda
 
 on:
   workflow_dispatch:
diff --git a/.github/workflows/deploy_dev_lambda_only.yml b/.github/workflows/deploy_dev_lambda_only.yml
new file mode 100644
index 00000000..82f2470b
--- /dev/null
+++ b/.github/workflows/deploy_dev_lambda_only.yml
@@ -0,0 +1,49 @@
+name: Dev Deploy Lambda Only
+# This allows us to deploy only lambda changes to dev without touching infra.
+
+on:
+  workflow_dispatch:
+
+permissions:
+  id-token: write
+  contents: write
+
+jobs:
+  setup:
+    runs-on: ubuntu-latest
+    outputs:
+      lambdas_dirs: ${{ steps.lambdas_dirs.outputs.just_outputs }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Get lambdas Directories
+        id: lambdas_dirs
+        uses: chrispsheehan/just-aws-oidc-action@0.3.0
+        with:
+          just_action: lambda-get-directories
+
+  build:
+    uses: ./.github/workflows/build.yml
+    needs: 
+      - setup
+    with:
+      environment: dev
+      version: ${{ github.sha }}
+      matrix: ${{ needs.setup.outputs.lambdas_dirs }}
+
+  get_build:
+    needs: build
+    uses: ./.github/workflows/build_get.yml
+    with:
+      environment: dev
+      version: ${{ github.sha }}
+
+  deploy:
+    uses: ./.github/workflows/deploy.yml
+    needs:
+      - get_build
+    with:
+      environment: dev
+      lambda_version: ${{ needs.get_build.outputs.lambda_version }}
+      lambda_bucket: ${{ needs.get_build.outputs.lambda_bucket }}
+      matrix: ${{ needs.get_build.outputs.lambda_version_files }}
diff --git a/.github/workflows/deploy_prod.yml b/.github/workflows/deploy_prod_full.yml
similarity index 96%
rename from .github/workflows/deploy_prod.yml
rename to .github/workflows/deploy_prod_full.yml
index bd181768..70db1854 100644
--- a/.github/workflows/deploy_prod.yml
+++ b/.github/workflows/deploy_prod_full.yml
@@ -1,4 +1,4 @@
-name: Prod Deploy
+name: Prod Deploy Infra and Lambda
 
 on:
   workflow_dispatch:
diff --git a/.github/workflows/deploy_prod_lambda_only.yml b/.github/workflows/deploy_prod_lambda_only.yml
new file mode 100644
index 00000000..fe447c52
--- /dev/null
+++ b/.github/workflows/deploy_prod_lambda_only.yml
@@ -0,0 +1,28 @@
+name: Prod Deploy Lambda Only
+# This allows us to deploy only lambda changes to prod found in the version s3 folder (only).
+
+on:
+  workflow_dispatch:
+
+permissions:
+  id-token: write
+  contents: write
+
+jobs:
+  get_build:
+    uses: ./.github/workflows/build_get.yml
+    with:
+      environment: ci
+      version: 0.5.1
+
+  deploy:
+    uses: ./.github/workflows/deploy.yml
+    needs: 
+      - get_build
+    with:
+      environment: prod
+      lambda_version: ${{ needs.get_build.outputs.lambda_version }}
+      lambda_bucket: ${{ needs.get_build.outputs.lambda_bucket }}
+      matrix: ${{ needs.get_build.outputs.lambda_version_files }}
+      # we can also define a lambda-only deployment here if needed, as below
+      # matrix: '["api"]'
\ No newline at end of file
diff --git a/README.md b/README.md
index 8eb2bde0..f13a8d2e 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,8 @@ module "lambda_example" {
 - we can handle an initial lag while lambda warms up/boots
 ```hcl
 provisioned_config = {
-    fixed = 0
+    fixed                = 0
+    reserved_concurrency = 2 # only allow 2 concurrent executions THIS ALSO SERVES AS A LIMIT TO AVOID THROTTLING
 }
 ```
 
@@ -45,7 +46,8 @@ provisioned_config = {
 - we never want lag due to warm up and can predict traffic
 ```hcl
 provisioned_config = {
-    fixed = 1
+    fixed                = 10
+    reserved_concurrency = 50
 }
 ```
 
@@ -65,12 +67,19 @@ provisioned_config = {
     }
 }
 ```
+- before scaling the lambda alias will match the minmum value
+![a](docs/lambda-config-before.png)
+- when the trigger percent is exceeded the lambda moves into `In progress (1/2)` state as an additional provisioned lambda is added.
+![a](docs/lamba-scaling-up.png)
+- after scaling the lambda alias will show an additional provisioned lambda
+![a](docs/lambda-config-after.png)
+
 
 ## 🚦 types of lambda deploy
 
 ```hcl
 module "lambda_example" {
-  source = "../lambda"
+  source = "../_shared/lambda"
   ...
   deployment_config = var.your_deployment_config
 }
@@ -109,3 +118,41 @@ deployment_config = {
     percentage       = 10
     interval_minutes = 1
 }
+```
+
+## 🔥↩️ deployment roll-back
+
+- use cloudwatch metrics and alarms to automatically roll-back a deployment
+- create a [cloudwatch_metric_alarm](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) resource and pass in as per below
+
+```hcl
+module "lambda_example" {
+  source = "../_shared/lambda"
+  ...
+  codedeploy_alarm_names = [
+    local.api_5xx_alarm_name
+  ]
+}
+```
+- if the alarm triggers during a deployment you will see the below in the CI
+
+```
+📦 Running: lambda-deploy
+🚀 Started deployment: d-40UUQH3DF
+Attempt 1: Deployment status is InProgress
+Attempt 2: Deployment status is InProgress
+Attempt 3: Deployment status is InProgress
+Attempt 4: Deployment status is InProgress
+Attempt 5: Deployment status is Stopped
+❌ Deployment d-40UUQH3DF failed or was stopped.
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+|                                                                                                                    GetDeployment                                                                                                                    |
++--------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|  ErrorCode   |  ALARM_ACTIVE                                                                                                                                                                                                                        |
+|  ErrorMessage|  One or more alarms have been activated according to the Amazon CloudWatch metrics you selected, and the affected deployments have been stopped. Activated alarms: <dev-aws-serverless-github-deploy-api-api-v2-5xx-rate-critical>   |
+|  Status      |  Stopped                                                                                                                                                                                                                             |
++--------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+error: Recipe `lambda-deploy` failed with exit code 1
+Error: Process completed with exit code 1.
+
+```
\ No newline at end of file
diff --git a/docs/lamba-scaling-up.png b/docs/lamba-scaling-up.png
new file mode 100644
index 00000000..21270203
Binary files /dev/null and b/docs/lamba-scaling-up.png differ
diff --git a/docs/lambda-config-after.png b/docs/lambda-config-after.png
new file mode 100644
index 00000000..14828714
Binary files /dev/null and b/docs/lambda-config-after.png differ
diff --git a/docs/lambda-config-before.png b/docs/lambda-config-before.png
new file mode 100644
index 00000000..6659eff9
Binary files /dev/null and b/docs/lambda-config-before.png differ
diff --git a/infra/live/dev/aws/api/terragrunt.hcl b/infra/live/dev/aws/api/terragrunt.hcl
index 4fc561db..37e4cc2f 100644
--- a/infra/live/dev/aws/api/terragrunt.hcl
+++ b/infra/live/dev/aws/api/terragrunt.hcl
@@ -2,6 +2,12 @@ include {
   path = find_in_parent_folders("root.hcl")
 }
 
+inputs = {
+  api_5xx_alarm_threshold           = 20.0
+  api_5xx_alarm_evaluation_periods  = 1
+  api_5xx_alarm_datapoints_to_alarm = 1
+}
+
 terraform {
   source = "../../../../modules//aws//api"
 }
diff --git a/infra/live/prod/aws/api/terragrunt.hcl b/infra/live/prod/aws/api/terragrunt.hcl
index 4fc561db..35f660c1 100644
--- a/infra/live/prod/aws/api/terragrunt.hcl
+++ b/infra/live/prod/aws/api/terragrunt.hcl
@@ -2,6 +2,12 @@ include {
   path = find_in_parent_folders("root.hcl")
 }
 
+inputs = {
+  api_5xx_alarm_threshold           = 5.0
+  api_5xx_alarm_evaluation_periods  = 1
+  api_5xx_alarm_datapoints_to_alarm = 1
+}
+
 terraform {
   source = "../../../../modules//aws//api"
 }
diff --git a/infra/modules/aws/_shared/lambda/locals.tf b/infra/modules/aws/_shared/lambda/locals.tf
index c7741a95..fac9ddb3 100644
--- a/infra/modules/aws/_shared/lambda/locals.tf
+++ b/infra/modules/aws/_shared/lambda/locals.tf
@@ -4,7 +4,7 @@ locals {
   compute_platform = "Lambda"
 
   lambda_bootstrap_zip_key = "bootstrap/bootstrap-lambda.zip"
-  lambda_name              = "${var.environment}-${var.project_name}-${var.lambda_name}"
+  lambda_name              = var.lambda_name
 
   deploy_all_at_once_type = "AllAtOnce"
   deploy_canary_type      = "TimeBasedCanary"
@@ -15,11 +15,18 @@ locals {
     canary      = local.deploy_canary_type
     linear      = local.deploy_linear_type
   }
+  deploy_strategy = local.deploy_config_type_map[var.deployment_config.strategy]
   deploy_config = {
-    type    = local.deploy_config_type_map[var.deployment_config.strategy]
+    type    = local.deploy_strategy
     percent = var.deployment_config.percentage
     minutes = var.deployment_config.interval_minutes
   }
+  deploy_config_suffix = lower((
+    var.deployment_config.strategy == "all_at_once"
+    ? local.deploy_strategy
+    : "${local.deploy_strategy}-${local.deploy_config.percent}-${local.deploy_config.minutes}"
+  ))
+  deployment_config_name = "${local.lambda_name}-deploy-${local.deploy_config_suffix}"
 
   fixed_mode      = try(var.provisioned_config.fixed != null, true) && try(var.provisioned_config.fixed > 0, false)
   auto_scale_mode = try(var.provisioned_config.auto_scale != null, false)
diff --git a/infra/modules/aws/_shared/lambda/main.tf b/infra/modules/aws/_shared/lambda/main.tf
index 44d5f246..e91b01a4 100644
--- a/infra/modules/aws/_shared/lambda/main.tf
+++ b/infra/modules/aws/_shared/lambda/main.tf
@@ -4,7 +4,7 @@ resource "aws_iam_role" "iam_for_lambda" {
 }
 
 resource "aws_iam_policy" "lambda_cloudwatch_logs" {
-  name   = "${var.project_name}-${var.environment}-lambda-cloudwatch-logs"
+  name   = "${local.lambda_name}-logs"
   policy = data.aws_iam_policy_document.lambda_cloudwatch_logs.json
 }
 
@@ -108,7 +108,7 @@ resource "aws_iam_role_policy" "cd_lambda" {
 }
 
 resource "aws_codedeploy_deployment_config" "lambda_config" {
-  deployment_config_name = "${local.lambda_name}-deploy-config"
+  deployment_config_name = local.deployment_config_name
   compute_platform       = local.compute_platform
 
   traffic_routing_config {
@@ -133,8 +133,10 @@ resource "aws_codedeploy_deployment_config" "lambda_config" {
 }
 
 resource "aws_codedeploy_deployment_group" "dg" {
+  depends_on = [aws_codedeploy_deployment_config.lambda_config] # to prevent DeploymentConfigInUseException
+
   app_name              = aws_codedeploy_app.app.name
-  deployment_group_name = "${local.lambda_name}-dg"
+  deployment_group_name = "${local.deployment_config_name}-dg"
   service_role_arn      = aws_iam_role.code_deploy_role.arn
 
   deployment_style {
@@ -142,12 +144,24 @@ resource "aws_codedeploy_deployment_group" "dg" {
     deployment_option = "WITH_TRAFFIC_CONTROL"
   }
 
-  deployment_config_name = aws_codedeploy_deployment_config.lambda_config.deployment_config_name
+  deployment_config_name = local.deployment_config_name
 
   auto_rollback_configuration {
     enabled = true
     events  = ["DEPLOYMENT_FAILURE", "DEPLOYMENT_STOP_ON_ALARM"]
   }
+
+  dynamic "alarm_configuration" {
+    for_each = length(var.codedeploy_alarm_names) > 0 ? [1] : []
+    content {
+      enabled = true
+      alarms  = var.codedeploy_alarm_names
+    }
+  }
+
+  lifecycle {
+    create_before_destroy = true # to prevent DeploymentConfigInUseException
+  }
 }
 
 resource "aws_appautoscaling_target" "pc_target" {
diff --git a/infra/modules/aws/_shared/lambda/variables.tf b/infra/modules/aws/_shared/lambda/variables.tf
index ada04226..96a2aec1 100644
--- a/infra/modules/aws/_shared/lambda/variables.tf
+++ b/infra/modules/aws/_shared/lambda/variables.tf
@@ -42,6 +42,12 @@ variable "additional_policy_arns" {
   default     = []
 }
 
+variable "codedeploy_alarm_names" {
+  description = "Optional list of CloudWatch alarm names that trigger CodeDeploy rollback"
+  type        = list(string)
+  default     = []
+}
+
 variable "deployment_config" {
   description = "Traffic shifting: all_at_once | canary | linear"
   type = object({
diff --git a/infra/modules/aws/api/local.tf b/infra/modules/aws/api/local.tf
new file mode 100644
index 00000000..aade786d
--- /dev/null
+++ b/infra/modules/aws/api/local.tf
@@ -0,0 +1,5 @@
+locals {
+  lambda_name           = "${var.environment}-${var.project_name}-api"
+  apigw_http_5xx_metric = "5xx"
+  api_5xx_alarm_name    = "${local.lambda_name}-api-v2-5xx-rate-critical"
+}
\ No newline at end of file
diff --git a/infra/modules/aws/api/main.tf b/infra/modules/aws/api/main.tf
index fd76eff9..93c91b10 100644
--- a/infra/modules/aws/api/main.tf
+++ b/infra/modules/aws/api/main.tf
@@ -5,36 +5,33 @@ module "lambda_api" {
   environment   = var.environment
   lambda_bucket = var.lambda_bucket
 
-  lambda_name = "api"
+  lambda_name = local.lambda_name
 
   environment_variables = {
     DEBUG_DELAY_MS = 500
   }
 
   deployment_config = {
-    strategy = "all_at_once"
+    strategy         = "canary"
+    percentage       = 10
+    interval_minutes = 3 # this is > the alarm evaluation period to ensure we catch the alarm if it triggers
   }
 
+  codedeploy_alarm_names = [
+    local.api_5xx_alarm_name
+  ]
+
   provisioned_config = {
-    fixed = 0 # cold starts only
-  }
+    auto_scale = {
+      max                        = 2
+      min                        = 1 # always have 1 lambda ready to go
+      trigger_percent            = 20
+      scale_in_cooldown_seconds  = 60
+      scale_out_cooldown_seconds = 60
+    }
 
-  # provisioned_config = {
-  #   fixed                = 1 # always have 1 lambda ready to go
-  #   reserved_concurrency = 2 # only allow 2 concurrent executions THIS ALSO SERVES AS A LIMIT TO AVOID THROTTLING
-  # }
-
-  # provisioned_config = {
-  #   auto_scale = {
-  #     max                        = 2
-  #     min                        = 1 # always have 1 lambda ready to go
-  #     trigger_percent            = 20
-  #     scale_in_cooldown_seconds  = 60
-  #     scale_out_cooldown_seconds = 60
-  #   }
-
-  #   reserved_concurrency = 10 # limit the amount of concurrent executions to avoid throttling, but allow some bursting
-  # }
+    reserved_concurrency = 10 # limit the amount of concurrent executions to avoid throttling, but allow some bursting
+  }
 }
 
 resource "aws_apigatewayv2_api" "http_api" {
@@ -74,3 +71,62 @@ resource "aws_lambda_permission" "allow_invoke" {
   principal     = "apigateway.amazonaws.com"
   source_arn    = "${aws_apigatewayv2_api.http_api.execution_arn}/*/*" # all routes/stages
 }
+
+resource "aws_cloudwatch_metric_alarm" "api_5xx_rate" {
+  alarm_name        = local.api_5xx_alarm_name
+  alarm_description = "HTTP API (v2) 5xx error rate > ${var.api_5xx_alarm_threshold}% for ${var.api_5xx_alarm_evaluation_periods} minute(s) ${var.api_5xx_alarm_datapoints_to_alarm} times"
+  actions_enabled   = true
+
+  comparison_operator = "GreaterThanThreshold"
+  threshold           = var.api_5xx_alarm_threshold           # This is the value your metric is compared against
+  evaluation_periods  = var.api_5xx_alarm_evaluation_periods  # This is how many consecutive periods CloudWatch looks at when deciding the alarm state.
+  datapoints_to_alarm = var.api_5xx_alarm_datapoints_to_alarm # This is how many of those evaluated periods must be breaching to trigger ALARM.
+  treat_missing_data  = "notBreaching"
+
+  #
+  # Metric math: (5xx / count) * 100
+  # Guarded to avoid NaN/Inf when count is 0 or very low
+  #
+  metric_query {
+    id          = "e"
+    label       = "5xxErrorRate"
+    return_data = true
+    expression  = "IF(mcount < 1, 0, (m5xx / mcount) * 100)"
+  }
+
+  #
+  # API Gateway v2 – 5XX errors
+  #
+  metric_query {
+    id = "m5xx"
+    metric {
+      namespace   = "AWS/ApiGateway"
+      metric_name = local.apigw_http_5xx_metric
+      stat        = "Sum"
+      period      = 60
+
+      dimensions = {
+        ApiId = aws_apigatewayv2_api.http_api.id
+        Stage = aws_apigatewayv2_stage.default.name
+      }
+    }
+  }
+
+  #
+  # API Gateway v2 – total request count
+  #
+  metric_query {
+    id = "mcount"
+    metric {
+      namespace   = "AWS/ApiGateway"
+      metric_name = "Count"
+      stat        = "Sum"
+      period      = 60
+
+      dimensions = {
+        ApiId = aws_apigatewayv2_api.http_api.id
+        Stage = aws_apigatewayv2_stage.default.name
+      }
+    }
+  }
+}
diff --git a/infra/modules/aws/api/variables.tf b/infra/modules/aws/api/variables.tf
index 6088d0c3..a75acdf9 100644
--- a/infra/modules/aws/api/variables.tf
+++ b/infra/modules/aws/api/variables.tf
@@ -14,3 +14,18 @@ variable "lambda_bucket" {
   description = "Lambda bucket where the code zip(s) are uploaded to"
 }
 ### end of static vars set in root.hcl ###
+
+variable "api_5xx_alarm_threshold" {
+  type        = number
+  description = "The threshold for the API 5xx error rate alarm"
+}
+
+variable "api_5xx_alarm_evaluation_periods" {
+  type        = number
+  description = "The number of consecutive periods CloudWatch looks at when deciding the alarm state"
+}
+
+variable "api_5xx_alarm_datapoints_to_alarm" {
+  type        = number
+  description = "The number of evaluated periods that must be breaching to trigger ALARM"
+}
\ No newline at end of file
diff --git a/infra/modules/aws/consumer/local.tf b/infra/modules/aws/consumer/local.tf
new file mode 100644
index 00000000..2b78c191
--- /dev/null
+++ b/infra/modules/aws/consumer/local.tf
@@ -0,0 +1,4 @@
+locals {
+  sqs_chunk_size = 5
+  lambda_name    = "${var.environment}-${var.project_name}-consumer"
+}
\ No newline at end of file
diff --git a/infra/modules/aws/consumer/main.tf b/infra/modules/aws/consumer/main.tf
index 9a4fb84e..13e1458d 100644
--- a/infra/modules/aws/consumer/main.tf
+++ b/infra/modules/aws/consumer/main.tf
@@ -5,7 +5,12 @@ module "lambda_consumer" {
   environment   = var.environment
   lambda_bucket = var.lambda_bucket
 
-  lambda_name = "consumer"
+  lambda_name = local.lambda_name
+
+  environment_variables = {
+    DEBUG_DELAY_MS = 500
+    CHUNK_SIZE     = local.sqs_chunk_size
+  }
 
   additional_policy_arns = [
     module.sqs_queue.sqs_queue_read_policy_arn
@@ -16,26 +21,19 @@ module "lambda_consumer" {
   }
 
   provisioned_config = {
-    fixed = 0 # cold starts only
+    sqs_scale = {
+      min                        = 1
+      max                        = 5
+      visible_messages           = 10
+      queue_name                 = module.sqs_queue.sqs_queue_name
+      scale_in_cooldown_seconds  = 60
+      scale_out_cooldown_seconds = 60
+    }
   }
-
-  # provisioned_config = {
-  #   fixed                = 1 # always have 1 lambda ready to go
-  #   reserved_concurrency = 2 # only allow 2 concurrent executions THIS ALSO SERVES AS A LIMIT TO AVOID THROTTLING
-  # }
-
-  # provisioned_config = {
-  #   sqs_scale = {
-  #     min                        = 1
-  #     max                        = 5
-  #     visible_messages           = 100
-  #     queue_name                 = module.sqs_queue.sqs_queue_name
-  #     scale_in_cooldown_seconds  = 60
-  #     scale_out_cooldown_seconds = 60
-  #   }
-  # }
 }
 
+# configure a deadletter queue (DLQ) for the SQS queue used by the Lambda consumer
+
 module "sqs_queue" {
   source = "../_shared/sqs"
 
@@ -46,8 +44,8 @@ resource "aws_lambda_event_source_mapping" "sqs" {
   event_source_arn = module.sqs_queue.sqs_queue_arn
   function_name    = module.lambda_consumer.function_name
 
-  batch_size                         = 500
+  batch_size                         = local.sqs_chunk_size
   maximum_batching_window_in_seconds = 10
 
   function_response_types = ["ReportBatchItemFailures"]
-}
\ No newline at end of file
+}
diff --git a/infra/modules/aws/consumer/outputs.tf b/infra/modules/aws/consumer/outputs.tf
index 21836daf..c9514ec4 100644
--- a/infra/modules/aws/consumer/outputs.tf
+++ b/infra/modules/aws/consumer/outputs.tf
@@ -13,3 +13,7 @@ output "lambda_function_name" {
 output "lambda_alias_name" {
   value = module.lambda_consumer.alias_name
 }
+
+output "sqs_queue_url" {
+  value = module.sqs_queue.sqs_queue_url
+}
diff --git a/justfile b/justfile
index 87c7bbb1..c387ab19 100644
--- a/justfile
+++ b/justfile
@@ -270,6 +270,11 @@ lambda-deploy:
     #!/usr/bin/env bash
     set -euo pipefail
 
+    if [[ -z "$FUNCTION_NAME" ]]; then
+        echo "❌ FUNCTION_NAME environment variable is not set."
+        exit 1
+    fi
+
     if [[ -z "$APP_SPEC_KEY" ]]; then
         echo "❌ APP_SPEC_KEY environment variable is not set."
         exit 1
@@ -372,37 +377,24 @@ lambda-prune:
         aws lambda delete-function --function-name "$FUNCTION_NAME" --qualifier "$v" --region "$AWS_REGION"
     done
 
-watch-lambda-autoscale:
+
+test-api-deploy-500s:
     #!/usr/bin/env bash
     set -euo pipefail
-    DURATION=180
-    CONCURRENCY=20
-    URL=https://slt6v1u8n4.execute-api.eu-west-2.amazonaws.com
-
-    END_TIME=$(( $(date +%s) + $DURATION ))
-
-    echo "🚀 Lambda autoscaling test"
-    echo "   URL:         $URL"
-    echo "   Duration:    $DURATION seconds"
-    echo "   Concurrency: $CONCURRENCY"
-    echo
-
-    while [[ $(date +%s) -lt "$END_TIME" ]]; do
-      seq 1 $CONCURRENCY \
-        | xargs -n1 -P $CONCURRENCY -I{} \
-            curl -s "$URL/" \
-        | jq -r '.env_id'
-    done \
-      | sort \
-      | uniq -c
-
-    echo
-    echo "🧊 Distinct Lambda environments:"
-    while [[ $(date +%s) -lt "$END_TIME" ]]; do
-      seq 1 $CONCURRENCY \
-        | xargs -n1 -P $CONCURRENCY -I{} \
-            curl -s "$URL/" \
-        | jq -r '.env_id'
-    done \
-      | sort \
-      | uniq
+
+    if [[ -z "$API_URL" ]]; then
+        echo "❌ API_URL environment variable is not set."
+        exit 1
+    fi
+
+    echo "Sending requests to $API_URL to trigger 500 errors..."
+
+    END=$((SECONDS+180))
+
+    while [ $SECONDS -lt $END ]; do
+        curl -s -o /dev/null "$API_URL/error"
+    done
+
+    echo "Finished sending requests."
+
+
diff --git a/lambdas/api/lambda_handler.py b/lambdas/api/lambda_handler.py
index 65528d54..ba6b54e5 100644
--- a/lambdas/api/lambda_handler.py
+++ b/lambdas/api/lambda_handler.py
@@ -3,12 +3,12 @@
 import uuid
 import time
 
-# Runs once per execution environment (cold start)
 ENV_ID = str(uuid.uuid4())[:8]
 BOOT_TIME_MS = int(time.time() * 1000)
 
 DEBUG_DELAY_MS = int(os.getenv("DEBUG_DELAY_MS", "0"))
 
+
 def lambda_handler(event, context):
     print("Received event:", json.dumps(event))
 
@@ -16,6 +16,24 @@ def lambda_handler(event, context):
     if DEBUG_DELAY_MS > 0:
         time.sleep(DEBUG_DELAY_MS / 1000.0)
 
+    # --- Error endpoint: /fail or /error returns 500 ---
+    path = event.get("rawPath") or event.get("path") or ""
+    if path in ("/fail", "/error", "/health/fail"):
+        error_body = {
+            "message": "Forced failure for testing",
+            "env_id": ENV_ID,
+            "request_id": context.aws_request_id,
+        }
+        return {
+            "statusCode": 500,
+            "headers": {
+                "Content-Type": "application/json",
+                "X-Env-Id": ENV_ID,
+            },
+            "body": json.dumps(error_body),
+        }
+
+    # Normal success response
     body = {
         "message": "Hello from Lambda!",
         "env_id": ENV_ID,
diff --git a/lambdas/consumer/lambda_handler.py b/lambdas/consumer/lambda_handler.py
index 163a1c5a..6b80fcfa 100644
--- a/lambdas/consumer/lambda_handler.py
+++ b/lambdas/consumer/lambda_handler.py
@@ -1,8 +1,10 @@
 import json
+import os
+from time import time
 from typing import List, Dict
 
-CHUNK_SIZE = 50
-
+CHUNK_SIZE = 5
+DEBUG_DELAY_MS = int(os.getenv("DEBUG_DELAY_MS", "0"))
 
 def chunk(items: List[Dict], size: int):
     """Yield successive chunks from a list."""
@@ -37,6 +39,9 @@ def process_chunk(records: List[Dict]) -> List[str]:
     for record in records:
         try:
             process_message(record)
+            # Optional delay to force concurrency during testing
+            if DEBUG_DELAY_MS > 0:
+                time.sleep(DEBUG_DELAY_MS / 1000.0)
         except Exception as exc:
             print(f"Failed processing message {record['messageId']}: {exc}")
             failed_message_ids.append(record["messageId"])