From 085964ac38c51559cf583e68469cc20c750d4962 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 15:28:17 -0300
Subject: [PATCH 01/24] feat: add dynamic assume role support via
 scope-configurations provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When assume_role.arn is set in the scope-configurations provider, the agent's
base credentials (IRSA) are used only to call sts:AssumeRole; all subsequent
AWS calls (CLI + Tofu) run under the target role. Falls back to ASSUME_ROLE_ARN_DEFAULT
in values.yaml if the provider key is absent. When neither is set, behavior is
unchanged — pod credentials (IRSA) are used directly.

- New utils/assume_role: sourceable helper that exports temporary credentials
- fetch_scope_configuration: reads assume_role.arn from scope-configurations
  provider and applies the role immediately after config is fetched
- diagnose/build_context: explicit assume_role sourcing (only build_context
  that bypasses fetch_scope_configuration)
- values.yaml: documents ASSUME_ROLE_ARN_DEFAULT as fallback config option

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/diagnose/build_context          |  1 +
 lambda/utils/assume_role               | 34 ++++++++++++++++++++++++++
 lambda/utils/fetch_scope_configuration |  9 +++++++
 lambda/values.yaml                     |  7 ++++++
 4 files changed, 51 insertions(+)
 create mode 100755 lambda/utils/assume_role

diff --git a/lambda/diagnose/build_context b/lambda/diagnose/build_context
index 1ef14b6..f234a67 100755
--- a/lambda/diagnose/build_context
+++ b/lambda/diagnose/build_context
@@ -15,6 +15,7 @@ if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
 fi
 
 source "$SERVICE_PATH/utils/lambda_function_name"
+source "$SERVICE_PATH/utils/assume_role"
 
 lambda_info=$(aws lambda get-function --function-name "$LAMBDA_FUNCTION_NAME" --output json 2>/dev/null || echo "{}")
 LAMBDA_FUNCTION_ARN=$(echo "$lambda_info" | jq -r '.Configuration.FunctionArn // ""')
diff --git a/lambda/utils/assume_role b/lambda/utils/assume_role
new file mode 100755
index 0000000..66cfe16
--- /dev/null
+++ b/lambda/utils/assume_role
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Sourceable helper — do NOT execute directly.
+# Reads ASSUME_ROLE_ARN from environment. If set, calls sts:AssumeRole and exports
+# temporary credentials so all subsequent AWS calls (CLI + Tofu) use that role.
+# If empty, does nothing — pod's IRSA handles auth directly.
+#
+# Requires: aws CLI, jq
+# Expects:  ASSUME_ROLE_ARN (exported by fetch_scope_configuration or values.yaml)
+#           SCOPE_ID (optional, used for the session name)
+
+_ar_log() {
+  if declare -f log > /dev/null 2>&1; then
+    log "$1" "$2"
+  else
+    echo "$2"
+  fi
+}
+
+if [ -n "${ASSUME_ROLE_ARN:-}" ]; then
+  _ar_log info "   🔑 Assuming role: $ASSUME_ROLE_ARN"
+
+  ASSUMED_CREDS=$(aws sts assume-role \
+    --role-arn     "$ASSUME_ROLE_ARN" \
+    --role-session-name "np-lambda-${SCOPE_ID:-workflow}" \
+    --output json)
+
+  export AWS_ACCESS_KEY_ID=$(echo "$ASSUMED_CREDS"     | jq -r '.Credentials.AccessKeyId')
+  export AWS_SECRET_ACCESS_KEY=$(echo "$ASSUMED_CREDS" | jq -r '.Credentials.SecretAccessKey')
+  export AWS_SESSION_TOKEN=$(echo "$ASSUMED_CREDS"     | jq -r '.Credentials.SessionToken')
+
+  _ar_log info "   ✅ Role assumed successfully"
+else
+  _ar_log debug "   ✅ assume_role=skipped (using pod credentials)"
+fi
diff --git a/lambda/utils/fetch_scope_configuration b/lambda/utils/fetch_scope_configuration
index 63429b7..ab79244 100755
--- a/lambda/utils/fetch_scope_configuration
+++ b/lambda/utils/fetch_scope_configuration
@@ -76,6 +76,11 @@ log debug "   ✅ placeholder_image_uri=$PLACEHOLDER_IMAGE_URI"
 NULL_AGENT_LAYER_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.agent.null_agent_layer_arn // empty')
 log debug "   ✅ null_agent_layer_arn=$NULL_AGENT_LAYER_ARN"
 
+# From scope-configurations category (optional — fallback to env var set in values.yaml)
+ASSUME_ROLE_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.assume_role.arn // empty')
+ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-${ASSUME_ROLE_ARN_DEFAULT:-}}"
+log debug "   ✅ assume_role_arn=${ASSUME_ROLE_ARN:-(not set, using pod credentials)}"
+
 export ALB_PUBLIC_LISTENER_ARN
 export ALB_PRIVATE_LISTENER_ARN
 export VPC_ID
@@ -88,5 +93,9 @@ export HOSTED_PRIVATE_ZONE_ID
 export TOFU_STATE_BUCKET
 export PLACEHOLDER_IMAGE_URI
 export NULL_AGENT_LAYER_ARN
+export ASSUME_ROLE_ARN
+
+# Apply assume role immediately so all subsequent AWS calls run under the target role
+source "$SERVICE_PATH/utils/assume_role"
 
 log info "✨ Scope configuration fetched successfully"
diff --git a/lambda/values.yaml b/lambda/values.yaml
index f8891bd..280e930 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -35,6 +35,13 @@ configuration:
   # ── Null Agent ─────────────────────────────────────────────────────────────
   USE_NULL_AGENT: false
 
+  # ── Assume Role ────────────────────────────────────────────────────────────
+  # IAM role ARN to assume before any AWS operation.
+  # Recommended: set via the scope-configurations provider key assume_role.arn
+  # so it's managed per-account without changing code.
+  # This value is only used if the provider does not supply assume_role.arn.
+  ASSUME_ROLE_ARN_DEFAULT: ""
+
   # ── IAM ────────────────────────────────────────────────────────────────────
   IAM_PROPAGATION_WAIT_SECONDS: 20
 

From d9ff61bbf712c2c39ea3479338aef5bf1afb88c7 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 15:48:56 -0300
Subject: [PATCH 02/24] feat: add requirements module with IAM policies for
 Lambda scope operations

Creates 4 IAM policies covering all AWS operations needed by the lambda scope:
- lambda_policy: Lambda CRUD, versions, aliases, concurrency
- lambda_iam_policy: execution role management (nullplatform-* and np-lambda-*)
- lambda_networking_policy: API Gateway, ALB, Route53
- lambda_storage_policy: ECR, Secrets Manager, CloudWatch, S3 tfstate

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/requirements/main.tf      | 299 +++++++++++++++++++++++++++++++
 lambda/requirements/output.tf    |  29 +++
 lambda/requirements/variables.tf |  22 +++
 3 files changed, 350 insertions(+)
 create mode 100644 lambda/requirements/main.tf
 create mode 100644 lambda/requirements/output.tf
 create mode 100644 lambda/requirements/variables.tf

diff --git a/lambda/requirements/main.tf b/lambda/requirements/main.tf
new file mode 100644
index 0000000..1939c7e
--- /dev/null
+++ b/lambda/requirements/main.tf
@@ -0,0 +1,299 @@
+################################################################################
+# IAM role (only when create_role = true)
+################################################################################
+
+resource "aws_iam_role" "nullplatform_lambda_role" {
+  count = var.create_role ? 1 : 0
+  name  = "nullplatform_${var.name}_lambda_role"
+
+  assume_role_policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect    = "Allow"
+        Principal = { AWS = var.trusted_arns }
+        Action    = "sts:AssumeRole"
+      }
+    ]
+  })
+}
+
+################################################################################
+# Policy attachments
+################################################################################
+
+locals {
+  effective_role_name = var.create_role ? aws_iam_role.nullplatform_lambda_role[0].name : var.role_name
+  attach_policies     = var.create_role || var.role_name != null
+}
+
+resource "aws_iam_role_policy_attachment" "lambda" {
+  count      = local.attach_policies ? 1 : 0
+  role       = local.effective_role_name
+  policy_arn = aws_iam_policy.nullplatform_lambda_policy.arn
+}
+
+resource "aws_iam_role_policy_attachment" "lambda_iam" {
+  count      = local.attach_policies ? 1 : 0
+  role       = local.effective_role_name
+  policy_arn = aws_iam_policy.nullplatform_lambda_iam_policy.arn
+}
+
+resource "aws_iam_role_policy_attachment" "lambda_networking" {
+  count      = local.attach_policies ? 1 : 0
+  role       = local.effective_role_name
+  policy_arn = aws_iam_policy.nullplatform_lambda_networking_policy.arn
+}
+
+resource "aws_iam_role_policy_attachment" "lambda_storage" {
+  count      = local.attach_policies ? 1 : 0
+  role       = local.effective_role_name
+  policy_arn = aws_iam_policy.nullplatform_lambda_storage_policy.arn
+}
+
+################################################################################
+# Lambda core policy
+# Manages Lambda functions, versions, aliases, concurrency, and invocations.
+################################################################################
+
+resource "aws_iam_policy" "nullplatform_lambda_policy" {
+  name        = "nullplatform_${var.name}_lambda_policy"
+  description = "Policy for managing Lambda functions provisioned by the scopes-lambda provider"
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "lambda:CreateFunction",
+          "lambda:DeleteFunction",
+          "lambda:GetFunction",
+          "lambda:GetFunctionConfiguration",
+          "lambda:GetFunctionConcurrency",
+          "lambda:UpdateFunctionCode",
+          "lambda:UpdateFunctionConfiguration",
+          "lambda:PublishVersion",
+          "lambda:ListVersionsByFunction",
+          "lambda:GetAlias",
+          "lambda:ListAliases",
+          "lambda:CreateAlias",
+          "lambda:UpdateAlias",
+          "lambda:DeleteAlias",
+          "lambda:InvokeFunction",
+          "lambda:PutFunctionConcurrency",
+          "lambda:DeleteFunctionConcurrency",
+          "lambda:PutProvisionedConcurrencyConfig",
+          "lambda:DeleteProvisionedConcurrencyConfig",
+          "lambda:GetProvisionedConcurrencyConfig",
+          "lambda:GetAccountSettings",
+          "lambda:AddPermission",
+          "lambda:RemovePermission",
+          "lambda:TagResource",
+          "lambda:UntagResource",
+          "lambda:ListTags"
+        ]
+        Resource = "*"
+      }
+    ]
+  })
+}
+
+################################################################################
+# IAM management policy
+# Creates and manages Lambda execution roles (scoped to nullplatform roles).
+################################################################################
+
+resource "aws_iam_policy" "nullplatform_lambda_iam_policy" {
+  name        = "nullplatform_${var.name}_lambda_iam_policy"
+  description = "Policy for managing IAM execution roles for Lambda scopes"
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "iam:CreateRole",
+          "iam:GetRole",
+          "iam:DeleteRole",
+          "iam:PutRolePolicy",
+          "iam:GetRolePolicy",
+          "iam:DeleteRolePolicy",
+          "iam:ListRolePolicies",
+          "iam:AttachRolePolicy",
+          "iam:DetachRolePolicy",
+          "iam:ListAttachedRolePolicies",
+          "iam:TagRole",
+          "iam:UntagRole",
+          "iam:PassRole"
+        ]
+        Resource = [
+          "arn:aws:iam::*:role/nullplatform-*",
+          "arn:aws:iam::*:role/np-lambda-*"
+        ]
+      },
+      {
+        Effect   = "Allow"
+        Action   = ["sts:GetCallerIdentity"]
+        Resource = "*"
+      }
+    ]
+  })
+}
+
+################################################################################
+# Networking policy
+# API Gateway (HTTP APIs), ALB (target groups + listener rules), Route53 DNS.
+################################################################################
+
+resource "aws_iam_policy" "nullplatform_lambda_networking_policy" {
+  name        = "nullplatform_${var.name}_lambda_networking_policy"
+  description = "Policy for managing API Gateway, ALB, and Route53 for Lambda scopes"
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "apigateway:GET",
+          "apigateway:POST",
+          "apigateway:PUT",
+          "apigateway:PATCH",
+          "apigateway:DELETE",
+          "apigateway:TagResource",
+          "apigateway:UntagResource"
+        ]
+        Resource = "*"
+      },
+      {
+        Effect = "Allow"
+        Action = [
+          "elasticloadbalancing:CreateTargetGroup",
+          "elasticloadbalancing:DeleteTargetGroup",
+          "elasticloadbalancing:ModifyTargetGroup",
+          "elasticloadbalancing:ModifyTargetGroupAttributes",
+          "elasticloadbalancing:DescribeTargetGroups",
+          "elasticloadbalancing:DescribeTargetGroupAttributes",
+          "elasticloadbalancing:RegisterTargets",
+          "elasticloadbalancing:DeregisterTargets",
+          "elasticloadbalancing:DescribeTargetHealth",
+          "elasticloadbalancing:CreateListenerRule",
+          "elasticloadbalancing:DeleteListenerRule",
+          "elasticloadbalancing:ModifyListenerRule",
+          "elasticloadbalancing:DescribeRules",
+          "elasticloadbalancing:DescribeListeners",
+          "elasticloadbalancing:AddTags",
+          "elasticloadbalancing:RemoveTags"
+        ]
+        Resource = "*"
+      },
+      {
+        Effect = "Allow"
+        Action = [
+          "route53:ChangeResourceRecordSets",
+          "route53:GetHostedZone",
+          "route53:ListResourceRecordSets",
+          "route53:ListHostedZones"
+        ]
+        Resource = "*"
+      }
+    ]
+  })
+}
+
+################################################################################
+# Storage & Observability policy
+# ECR (placeholder image), Secrets Manager (deployment parameters),
+# CloudWatch Logs & Metrics, S3 (tfstate bucket).
+################################################################################
+
+resource "aws_iam_policy" "nullplatform_lambda_storage_policy" {
+  name        = "nullplatform_${var.name}_lambda_storage_policy"
+  description = "Policy for ECR, Secrets Manager, CloudWatch, and S3 tfstate for Lambda scopes"
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Sid    = "ECR"
+        Effect = "Allow"
+        Action = [
+          "ecr:GetAuthorizationToken",
+          "ecr:CreateRepository",
+          "ecr:DescribeRepositories",
+          "ecr:DescribeImages",
+          "ecr:BatchGetImage",
+          "ecr:GetDownloadUrlForLayer",
+          "ecr:InitiateLayerUpload",
+          "ecr:UploadLayerPart",
+          "ecr:CompleteLayerUpload",
+          "ecr:PutImage",
+          "ecr:BatchCheckLayerAvailability",
+          "ecr:TagResource"
+        ]
+        Resource = "*"
+      },
+      {
+        Sid    = "SecretsManager"
+        Effect = "Allow"
+        Action = [
+          "secretsmanager:CreateSecret",
+          "secretsmanager:PutSecretValue",
+          "secretsmanager:GetSecretValue",
+          "secretsmanager:DescribeSecret",
+          "secretsmanager:ListSecrets",
+          "secretsmanager:DeleteSecret",
+          "secretsmanager:TagResource"
+        ]
+        Resource = "arn:aws:secretsmanager:*:*:secret:nullplatform/*"
+      },
+      {
+        Sid    = "CloudWatchLogs"
+        Effect = "Allow"
+        Action = [
+          "logs:CreateLogGroup",
+          "logs:DeleteLogGroup",
+          "logs:DescribeLogGroups",
+          "logs:CreateLogStream",
+          "logs:PutLogEvents",
+          "logs:FilterLogEvents",
+          "logs:GetLogEvents",
+          "logs:PutRetentionPolicy",
+          "logs:TagLogGroup"
+        ]
+        Resource = "*"
+      },
+      {
+        Sid    = "CloudWatchMetrics"
+        Effect = "Allow"
+        Action = [
+          "cloudwatch:GetMetricStatistics",
+          "cloudwatch:ListMetrics",
+          "cloudwatch:GetMetricData"
+        ]
+        Resource = "*"
+      },
+      {
+        Sid    = "S3Tfstate"
+        Effect = "Allow"
+        Action = [
+          "s3:CreateBucket",
+          "s3:HeadBucket",
+          "s3:PutBucketVersioning",
+          "s3:ListBucket",
+          "s3:ListBucketVersions",
+          "s3:GetObject",
+          "s3:PutObject",
+          "s3:DeleteObject",
+          "s3:DeleteObjectVersion"
+        ]
+        Resource = [
+          "arn:aws:s3:::nullplatform-lambda-tfstate-*",
+          "arn:aws:s3:::nullplatform-lambda-tfstate-*/*"
+        ]
+      }
+    ]
+  })
+}
diff --git a/lambda/requirements/output.tf b/lambda/requirements/output.tf
new file mode 100644
index 0000000..a3570a3
--- /dev/null
+++ b/lambda/requirements/output.tf
@@ -0,0 +1,29 @@
+output "lambda_policy_arn" {
+  description = "ARN of the Lambda core management policy"
+  value       = aws_iam_policy.nullplatform_lambda_policy.arn
+}
+
+output "lambda_iam_policy_arn" {
+  description = "ARN of the IAM execution role management policy"
+  value       = aws_iam_policy.nullplatform_lambda_iam_policy.arn
+}
+
+output "lambda_networking_policy_arn" {
+  description = "ARN of the networking policy (API GW + ALB + Route53)"
+  value       = aws_iam_policy.nullplatform_lambda_networking_policy.arn
+}
+
+output "lambda_storage_policy_arn" {
+  description = "ARN of the storage & observability policy (ECR + SM + CW + S3)"
+  value       = aws_iam_policy.nullplatform_lambda_storage_policy.arn
+}
+
+output "role_arn" {
+  description = "ARN of the IAM role created by this module. Empty string when create_role is false."
+  value       = var.create_role ? aws_iam_role.nullplatform_lambda_role[0].arn : ""
+}
+
+output "role_name" {
+  description = "Name of the IAM role created by this module. Empty string when create_role is false."
+  value       = var.create_role ? aws_iam_role.nullplatform_lambda_role[0].name : ""
+}
diff --git a/lambda/requirements/variables.tf b/lambda/requirements/variables.tf
new file mode 100644
index 0000000..d8b8298
--- /dev/null
+++ b/lambda/requirements/variables.tf
@@ -0,0 +1,22 @@
+variable "name" {
+  description = "Unique identifier for policy naming. Must be unique per AWS account (IAM policy names are account-global). Example: \"prod-us-east-1\"."
+  type        = string
+}
+
+variable "create_role" {
+  description = "When true, creates a new IAM role and attaches all policies to it. The role will allow the ARNs in trusted_arns to assume it via sts:AssumeRole."
+  type        = bool
+  default     = false
+}
+
+variable "role_name" {
+  description = "Existing IAM role name to attach the Lambda policies to. Ignored when create_role is true."
+  type        = string
+  default     = null
+}
+
+variable "trusted_arns" {
+  description = "List of IAM principal ARNs allowed to assume the role. Only used when create_role is true."
+  type        = list(string)
+  default     = []
+}

From 52cba87e3ced810003281e3b64b09bd3c265c0d5 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 15:51:00 -0300
Subject: [PATCH 03/24] chore: set ASSUME_ROLE_ARN_DEFAULT for testing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lambda/values.yaml b/lambda/values.yaml
index 280e930..629f30e 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -40,7 +40,7 @@ configuration:
   # Recommended: set via the scope-configurations provider key assume_role.arn
   # so it's managed per-account without changing code.
   # This value is only used if the provider does not supply assume_role.arn.
-  ASSUME_ROLE_ARN_DEFAULT: ""
+  ASSUME_ROLE_ARN_DEFAULT: "arn:aws:iam::235494813897:role/nullplatform_aws-services-cluster_lambda_role"
 
   # ── IAM ────────────────────────────────────────────────────────────────────
   IAM_PROPAGATION_WAIT_SECONDS: 20

From d49d2b983a2db8bd980a92173a2fd865316ee6a1 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 15:56:34 -0300
Subject: [PATCH 04/24] fix: correct nullplatform provider version constraint
 in specs/tofu

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/specs/tofu/provider.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lambda/specs/tofu/provider.tf b/lambda/specs/tofu/provider.tf
index ef613db..51d4024 100644
--- a/lambda/specs/tofu/provider.tf
+++ b/lambda/specs/tofu/provider.tf
@@ -2,7 +2,7 @@ terraform {
   required_providers {
     nullplatform = {
       source  = "nullplatform/nullplatform"
-      version = "0.0.87, < 0.1.0"
+      version = ">= 0.0.90, < 0.1.0"
     }
     http = {
       source  = "hashicorp/http"

From 33842ec3194eee25162bddc5669cd0035a18bcd7 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 16:20:09 -0300
Subject: [PATCH 05/24] fix: surface sts:AssumeRole errors to stdout for
 visibility in NP logs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/utils/assume_role | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lambda/utils/assume_role b/lambda/utils/assume_role
index 66cfe16..0c851de 100755
--- a/lambda/utils/assume_role
+++ b/lambda/utils/assume_role
@@ -19,10 +19,17 @@ _ar_log() {
 if [ -n "${ASSUME_ROLE_ARN:-}" ]; then
   _ar_log info "   🔑 Assuming role: $ASSUME_ROLE_ARN"
 
-  ASSUMED_CREDS=$(aws sts assume-role \
+  _ar_sts_error=$(mktemp)
+  if ! ASSUMED_CREDS=$(aws sts assume-role \
     --role-arn     "$ASSUME_ROLE_ARN" \
     --role-session-name "np-lambda-${SCOPE_ID:-workflow}" \
-    --output json)
+    --output json 2>"$_ar_sts_error"); then
+    _ar_log info "ERROR: sts:AssumeRole failed for $ASSUME_ROLE_ARN"
+    _ar_log info "$(cat "$_ar_sts_error")"
+    rm -f "$_ar_sts_error"
+    return 1
+  fi
+  rm -f "$_ar_sts_error"
 
   export AWS_ACCESS_KEY_ID=$(echo "$ASSUMED_CREDS"     | jq -r '.Credentials.AccessKeyId')
   export AWS_SECRET_ACCESS_KEY=$(echo "$ASSUMED_CREDS" | jq -r '.Credentials.SecretAccessKey')

From 14d3ad54b4ee082c44037b4b1f746f0b45c35705 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 16:43:05 -0300
Subject: [PATCH 06/24] fix: use exact PLACEHOLDER_IMAGE_URI when explicitly
 set, skip arch suffix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When PLACEHOLDER_IMAGE_URI is set in values.yaml the operator has already
chosen the exact tag — no architecture suffix should be appended.
Sets the default to :latest (no arch suffix) for this deployment.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/scope/scripts/resolve_placeholder_image | 10 ++++++++--
 lambda/values.yaml                             |  5 +++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/lambda/scope/scripts/resolve_placeholder_image b/lambda/scope/scripts/resolve_placeholder_image
index ca6a1a1..c103869 100755
--- a/lambda/scope/scripts/resolve_placeholder_image
+++ b/lambda/scope/scripts/resolve_placeholder_image
@@ -35,14 +35,20 @@ fi
 # ── Image placeholder path ────────────────────────────────────────────────────
 log info "🔍 Resolving placeholder image URI..."
 
-placeholder_image_base="${PLACEHOLDER_IMAGE_URI:-public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest}"
+default_image_base="public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest"
+placeholder_image_base="${PLACEHOLDER_IMAGE_URI:-$default_image_base}"
 architecture="${ARCHITECTURE:-arm64}"
 
 # Lambda uses "x86_64" but images are tagged with Docker convention "amd64"
 arch_tag="${architecture}"
 [ "$architecture" = "x86_64" ] && arch_tag="amd64"
 
-if [[ "$placeholder_image_base" == *":"* ]]; then
+# Only append the architecture suffix when using the default image.
+# If PLACEHOLDER_IMAGE_URI is explicitly set, use it as-is — the operator
+# already chose the exact tag they want.
+if [ -n "${PLACEHOLDER_IMAGE_URI:-}" ]; then
+  placeholder_image_uri="$placeholder_image_base"
+elif [[ "$placeholder_image_base" == *":"* ]]; then
   placeholder_image_uri="${placeholder_image_base}-${arch_tag}"
 else
   placeholder_image_uri="${placeholder_image_base}:latest-${arch_tag}"
diff --git a/lambda/values.yaml b/lambda/values.yaml
index 629f30e..026184f 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -35,6 +35,11 @@ configuration:
   # ── Null Agent ─────────────────────────────────────────────────────────────
   USE_NULL_AGENT: false
 
+  # ── Placeholder image ──────────────────────────────────────────────────────
+  # Set to the exact image URI to use as-is (no architecture suffix appended).
+  # Leave empty to let the script auto-select latest-arm64 / latest-amd64.
+  PLACEHOLDER_IMAGE_URI: "public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest"
+
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.
   # Recommended: set via the scope-configurations provider key assume_role.arn

From fc8bc760f83316b69a07fb2eb3c32feb91da02c5 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 16:54:38 -0300
Subject: [PATCH 07/24] fix: remove automatic arch suffix from placeholder
 image URI

The public ECR image only exists as :latest without architecture-specific
tags. Remove the -arm64/-amd64 append logic from the default path.
Users who publish arch-specific images can set PLACEHOLDER_IMAGE_URI
explicitly to the full tag they need.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../scope/scripts/resolve_placeholder_image   | 19 +++++++------------
 lambda/values.yaml                            |  5 -----
 2 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/lambda/scope/scripts/resolve_placeholder_image b/lambda/scope/scripts/resolve_placeholder_image
index c103869..c196631 100755
--- a/lambda/scope/scripts/resolve_placeholder_image
+++ b/lambda/scope/scripts/resolve_placeholder_image
@@ -35,23 +35,18 @@ fi
 # ── Image placeholder path ────────────────────────────────────────────────────
 log info "🔍 Resolving placeholder image URI..."
 
-default_image_base="public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest"
-placeholder_image_base="${PLACEHOLDER_IMAGE_URI:-$default_image_base}"
+placeholder_image_base="${PLACEHOLDER_IMAGE_URI:-public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest}"
 architecture="${ARCHITECTURE:-arm64}"
 
-# Lambda uses "x86_64" but images are tagged with Docker convention "amd64"
-arch_tag="${architecture}"
-[ "$architecture" = "x86_64" ] && arch_tag="amd64"
+log debug "   📋 architecture=$architecture"
 
-# Only append the architecture suffix when using the default image.
-# If PLACEHOLDER_IMAGE_URI is explicitly set, use it as-is — the operator
-# already chose the exact tag they want.
-if [ -n "${PLACEHOLDER_IMAGE_URI:-}" ]; then
+# Use the image URI as-is. If PLACEHOLDER_IMAGE_URI is not set, the default
+# :latest tag is used without any architecture suffix — publish arch-specific
+# tags and set PLACEHOLDER_IMAGE_URI explicitly if needed.
+if [[ "$placeholder_image_base" == *":"* ]]; then
   placeholder_image_uri="$placeholder_image_base"
-elif [[ "$placeholder_image_base" == *":"* ]]; then
-  placeholder_image_uri="${placeholder_image_base}-${arch_tag}"
 else
-  placeholder_image_uri="${placeholder_image_base}:latest-${arch_tag}"
+  placeholder_image_uri="${placeholder_image_base}:latest"
 fi
 
 log debug "   📋 architecture=$architecture"
diff --git a/lambda/values.yaml b/lambda/values.yaml
index 026184f..629f30e 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -35,11 +35,6 @@ configuration:
   # ── Null Agent ─────────────────────────────────────────────────────────────
   USE_NULL_AGENT: false
 
-  # ── Placeholder image ──────────────────────────────────────────────────────
-  # Set to the exact image URI to use as-is (no architecture suffix appended).
-  # Leave empty to let the script auto-select latest-arm64 / latest-amd64.
-  PLACEHOLDER_IMAGE_URI: "public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest"
-
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.
   # Recommended: set via the scope-configurations provider key assume_role.arn

From 3f89288f8b0246beb6011658249c6ef65e904d2a Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 1 Jun 2026 17:42:56 -0300
Subject: [PATCH 08/24] fix: read TOFU_STATE_BUCKET from
 .provider.aws_state_bucket as fallback

The existing scope-configurations provider in this account uses a different
schema (.provider.aws_state_bucket) than our Lambda spec (.state.tofu_state_bucket).
Add fallback to support both schemas without requiring a new provider instance.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lambda/utils/fetch_scope_configuration | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lambda/utils/fetch_scope_configuration b/lambda/utils/fetch_scope_configuration
index ab79244..88f447a 100755
--- a/lambda/utils/fetch_scope_configuration
+++ b/lambda/utils/fetch_scope_configuration
@@ -67,7 +67,7 @@ HOSTED_PRIVATE_ZONE_ID=$(echo "$CLOUD_PROVIDER_CONFIG" | jq -r '.networking.host
 log debug "   ✅ hosted_private_zone_id=$HOSTED_PRIVATE_ZONE_ID"
 
 # From scope-configurations category
-TOFU_STATE_BUCKET=$(echo "$SCOPE_CONFIG" | jq -r '.state.tofu_state_bucket // empty')
+TOFU_STATE_BUCKET=$(echo "$SCOPE_CONFIG" | jq -r '.state.tofu_state_bucket // .provider.aws_state_bucket // empty')
 log debug "   ✅ tofu_state_bucket=$TOFU_STATE_BUCKET"
 
 PLACEHOLDER_IMAGE_URI=$(echo "$SCOPE_CONFIG" | jq -r '.deployment.placeholder_image_uri // empty')

From 97121e48ce48a57544fab74f987c213e2b61a1e3 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 11:04:44 -0300
Subject: [PATCH 09/24] fix(iam): prefix lambda execution role with np-lambda-
 to match requirements policy

The scope execution role was named "${function}-role", which didn't match the
iam:CreateRole/PassRole Resource constraint (arn:aws:iam::*:role/np-lambda-*) in
lambda/requirements, causing AccessDenied at tofu apply. Prefixing aligns the
role name with the policy the assumed role already grants.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/scope/tofu/iam/setup | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lambda/scope/tofu/iam/setup b/lambda/scope/tofu/iam/setup
index 803f4b9..bbc7a40 100755
--- a/lambda/scope/tofu/iam/setup
+++ b/lambda/scope/tofu/iam/setup
@@ -4,7 +4,9 @@ source "$SERVICE_PATH/utils/log"
 
 log info "🔍 Configuring IAM role for deployment..."
 
-iam_role_name="${LAMBDA_FUNCTION_NAME}-role"
+# Prefix with "np-lambda-" so the role name matches the iam:CreateRole/PassRole
+# Resource constraint in lambda/requirements (arn:aws:iam::*:role/np-lambda-*).
+iam_role_name="np-lambda-${LAMBDA_FUNCTION_NAME}-role"
 iam_role_name="${iam_role_name:0:64}"
 
 log debug "   📋 role_name=$iam_role_name"

From 684d9f7fd5aab98a2686e55901fa62b27db9a9a1 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 11:16:55 -0300
Subject: [PATCH 10/24] fix(tofu): surface tofu apply stderr to stdout for
 visibility in NP logs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenTofu writes its "Error:" block to stderr, but the NP workflow executor only
captures stdout — so the real failure reason (e.g. AWS AccessDenied) never showed
in the logs, leaving only a generic "scope creation failed". Redirect stderr to
stdout on the apply and stop sending the script's own error message to stderr.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/scope/tofu/do_tofu | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lambda/scope/tofu/do_tofu b/lambda/scope/tofu/do_tofu
index 30954c3..bb90822 100755
--- a/lambda/scope/tofu/do_tofu
+++ b/lambda/scope/tofu/do_tofu
@@ -177,13 +177,16 @@ if [ "$TOFU_ACTION" = "apply" ]; then
 fi
 
 # Run tofu action
+# Redirect stderr to stdout: OpenTofu writes its "Error:" block to stderr, and the
+# NP workflow executor only captures stdout — without this, the actual failure
+# reason (e.g. an AWS AccessDenied) never reaches the NP logs.
 log info "📝 Running tofu $TOFU_ACTION..."
 tofu_exit_code=0
-tofu -chdir="$TF_WORKING_DIR" "$TOFU_ACTION" -auto-approve -var-file="$TOFU_VAR_FILE" || tofu_exit_code=$?
+tofu -chdir="$TF_WORKING_DIR" "$TOFU_ACTION" -auto-approve -var-file="$TOFU_VAR_FILE" 2>&1 || tofu_exit_code=$?
 if [ $tofu_exit_code -ne 0 ]; then
   echo ""
-  echo "❌ Tofu $TOFU_ACTION failed with exit code $tofu_exit_code" >&2
-  echo "" >&2
+  echo "❌ Tofu $TOFU_ACTION failed with exit code $tofu_exit_code"
+  echo ""
   return 1
 fi
 

From b9e41d3bfdde1586a6f89cbb61e3b76b8a203ac2 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 11:23:53 -0300
Subject: [PATCH 11/24] fix(iam): add modern CloudWatch Logs tagging actions to
 lambda requirements policy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The AWS provider (v5) reads log group tags via logs:ListTagsForResource and
manages them via logs:TagResource/UntagResource — the generic resource-tagging
API — but the policy only granted the deprecated logs:TagLogGroup. Creating a
scope's aws_cloudwatch_log_group failed with AccessDenied on ListTagsForResource.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/requirements/main.tf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lambda/requirements/main.tf b/lambda/requirements/main.tf
index 1939c7e..d34f63f 100644
--- a/lambda/requirements/main.tf
+++ b/lambda/requirements/main.tf
@@ -261,7 +261,10 @@ resource "aws_iam_policy" "nullplatform_lambda_storage_policy" {
           "logs:FilterLogEvents",
           "logs:GetLogEvents",
           "logs:PutRetentionPolicy",
-          "logs:TagLogGroup"
+          "logs:TagLogGroup",
+          "logs:ListTagsForResource",
+          "logs:TagResource",
+          "logs:UntagResource"
         ]
         Resource = "*"
       },

From bd26af424b97875ca50fbc16da8a0d06753f3ce6 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 15:15:35 -0300
Subject: [PATCH 12/24] feat(placeholder): make placeholder image configurable
 via PLACEHOLDER_IMAGE_URI_DEFAULT

Adds an env-var fallback for the Lambda placeholder image, mirroring the existing
ASSUME_ROLE_ARN_DEFAULT pattern. Precedence: scope-config
deployment.placeholder_image_uri > PLACEHOLDER_IMAGE_URI_DEFAULT (values.yaml) >
script's hardcoded default. Lets operators point the placeholder at a private ECR
mirror per account without a scope-configuration value or code changes.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/utils/fetch_scope_configuration |  4 +++-
 lambda/values.yaml                     | 10 ++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/lambda/utils/fetch_scope_configuration b/lambda/utils/fetch_scope_configuration
index 88f447a..f002d7f 100755
--- a/lambda/utils/fetch_scope_configuration
+++ b/lambda/utils/fetch_scope_configuration
@@ -71,7 +71,9 @@ TOFU_STATE_BUCKET=$(echo "$SCOPE_CONFIG" | jq -r '.state.tofu_state_bucket // .p
 log debug "   ✅ tofu_state_bucket=$TOFU_STATE_BUCKET"
 
 PLACEHOLDER_IMAGE_URI=$(echo "$SCOPE_CONFIG" | jq -r '.deployment.placeholder_image_uri // empty')
-log debug "   ✅ placeholder_image_uri=$PLACEHOLDER_IMAGE_URI"
+# Fallback to env var set in values.yaml when the provider does not supply it.
+PLACEHOLDER_IMAGE_URI="${PLACEHOLDER_IMAGE_URI:-${PLACEHOLDER_IMAGE_URI_DEFAULT:-}}"
+log debug "   ✅ placeholder_image_uri=${PLACEHOLDER_IMAGE_URI:-(not set, using script default)}"
 
 NULL_AGENT_LAYER_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.agent.null_agent_layer_arn // empty')
 log debug "   ✅ null_agent_layer_arn=$NULL_AGENT_LAYER_ARN"
diff --git a/lambda/values.yaml b/lambda/values.yaml
index 629f30e..f307de9 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -35,6 +35,16 @@ configuration:
   # ── Null Agent ─────────────────────────────────────────────────────────────
   USE_NULL_AGENT: false
 
+  # ── Placeholder image ──────────────────────────────────────────────────────
+  # Container image used to bootstrap the Lambda function at scope creation,
+  # before the first real deployment. MUST live in a private ECR in the same
+  # account/region (Lambda rejects public.ecr.aws images), and be single-arch
+  # matching the scope architecture (publish :latest-amd64 / :latest-arm64).
+  # Recommended: set via the scope-configurations provider key
+  # deployment.placeholder_image_uri so it's managed per-account without code.
+  # This value is only used if the provider does not supply it.
+  PLACEHOLDER_IMAGE_URI_DEFAULT: "235494813897.dkr.ecr.us-east-1.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder:latest-amd64"
+
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.
   # Recommended: set via the scope-configurations provider key assume_role.arn

From 2dc0a3ef0836d525000998870cf106682ad01784 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 15:32:10 -0300
Subject: [PATCH 13/24] fix(deploy): ensure Lambda pull policy on the image's
 ECR repo before update

Container-image Lambdas require the source ECR repo to grant lambda.amazonaws.com
pull access; without it update-function-code fails with "Lambda does not have
permission to access the ECR image". update_function_code now sets the standard
LambdaECRImageRetrievalPolicy on the image's repo (idempotent, best-effort), and
the requirements role gains ecr:Get/SetRepositoryPolicy. Removes the need to set
the policy by hand per application repo.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/deployment/scripts/update_function_code | 16 ++++++++++++++++
 lambda/requirements/main.tf                    |  4 +++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/lambda/deployment/scripts/update_function_code b/lambda/deployment/scripts/update_function_code
index bd91556..27c68e8 100755
--- a/lambda/deployment/scripts/update_function_code
+++ b/lambda/deployment/scripts/update_function_code
@@ -34,6 +34,22 @@ if [ "$package_type" = "Image" ]; then
   fi
   log debug "   ✅ image_uri=$IMAGE_URI"
 
+  # Ensure the image's ECR repo lets the Lambda service pull it. Container-image
+  # Lambdas require a repository policy granting lambda.amazonaws.com; without it
+  # update-function-code fails with "Lambda does not have permission to access
+  # the ECR image". Idempotent and best-effort (cross-account repos may not be
+  # writable from here — Lambda would then need the policy set on the source side).
+  if [[ "$IMAGE_URI" == *.dkr.ecr.*.amazonaws.com/* ]]; then
+    ecr_region=$(echo "${IMAGE_URI%%/*}" | cut -d. -f4)
+    ecr_repo="${IMAGE_URI#*/}"; ecr_repo="${ecr_repo%%:*}"; ecr_repo="${ecr_repo%%@*}"
+    lambda_pull_policy='{"Version":"2008-10-17","Statement":[{"Sid":"LambdaECRImageRetrievalPolicy","Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":["ecr:BatchGetImage","ecr:GetDownloadUrlForLayer"]}]}'
+    if aws ecr set-repository-policy --repository-name "$ecr_repo" --region "$ecr_region" --policy-text "$lambda_pull_policy" >/dev/null 2>&1; then
+      log debug "   ✅ ensured Lambda pull policy on ECR repo $ecr_repo"
+    else
+      log warn "   ⚠️  could not set Lambda pull policy on ECR repo $ecr_repo (continuing; pull may fail if not already allowed)"
+    fi
+  fi
+
   update_output=$(aws lambda update-function-code \
     --function-name "$LAMBDA_FUNCTION_NAME" \
     --image-uri "$IMAGE_URI" \
diff --git a/lambda/requirements/main.tf b/lambda/requirements/main.tf
index d34f63f..ca10521 100644
--- a/lambda/requirements/main.tf
+++ b/lambda/requirements/main.tf
@@ -231,7 +231,9 @@ resource "aws_iam_policy" "nullplatform_lambda_storage_policy" {
           "ecr:CompleteLayerUpload",
           "ecr:PutImage",
           "ecr:BatchCheckLayerAvailability",
-          "ecr:TagResource"
+          "ecr:TagResource",
+          "ecr:GetRepositoryPolicy",
+          "ecr:SetRepositoryPolicy"
         ]
         Resource = "*"
       },

From c04e9cc67aaa1f4de47fda2630417d7f51cb87db Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 16:03:55 -0300
Subject: [PATCH 14/24] fix(deploy): add missing diagnose.yaml workflow for
 diagnose-deployment action

The diagnose-deployment action mapped to deployment/workflows/diagnose.yaml,
which did not exist, so every auto-diagnose after a failed deployment errored
with "failed to read workflow file". Adds the workflow mirroring the scope
diagnose flow: lean diagnose/build_context + executor over diagnose/checks.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/deployment/workflows/diagnose.yaml | 31 +++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 lambda/deployment/workflows/diagnose.yaml

diff --git a/lambda/deployment/workflows/diagnose.yaml b/lambda/deployment/workflows/diagnose.yaml
new file mode 100644
index 0000000..e9425ea
--- /dev/null
+++ b/lambda/deployment/workflows/diagnose.yaml
@@ -0,0 +1,31 @@
+include:
+  - "$SERVICE_PATH/values.yaml"
+steps:
+  - name: build_context
+    type: script
+    file: "$SERVICE_PATH/diagnose/build_context"
+    output:
+      - name: SCOPE_ID
+        type: environment
+      - name: SCOPE_NRN
+        type: environment
+      - name: LAMBDA_FUNCTION_NAME
+        type: environment
+      - name: LAMBDA_FUNCTION_ARN
+        type: environment
+      - name: LAMBDA_ROLE_ARN
+        type: environment
+      - name: SCOPE_DOMAIN
+        type: environment
+  - name: diagnose
+    type: executor
+    before_each:
+      name: notify_check_running
+      type: script
+      file: "$SERVICE_PATH/diagnose/notify_check_running"
+    after_each:
+      name: notify_check_results
+      type: script
+      file: "$SERVICE_PATH/diagnose/notify_results"
+    folders:
+      - "$SERVICE_PATH/diagnose/checks"

From bbd31a071ae96aaf14ec0715cbb80056e8a38658 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 2 Jun 2026 17:31:06 -0300
Subject: [PATCH 15/24] chore: remove account-specific defaults from
 values.yaml

ASSUME_ROLE_ARN_DEFAULT and PLACEHOLDER_IMAGE_URI_DEFAULT carried a real AWS
account ARN/URI committed for testing. The product repo must stay account-agnostic:
both are now documented as account-specific and provided per-installation via the
scope-configurations provider or the agent's extra_envs (Helm), not hardcoded here.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/values.yaml | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/lambda/values.yaml b/lambda/values.yaml
index f307de9..258ad89 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -40,17 +40,21 @@ configuration:
   # before the first real deployment. MUST live in a private ECR in the same
   # account/region (Lambda rejects public.ecr.aws images), and be single-arch
   # matching the scope architecture (publish :latest-amd64 / :latest-arm64).
-  # Recommended: set via the scope-configurations provider key
-  # deployment.placeholder_image_uri so it's managed per-account without code.
-  # This value is only used if the provider does not supply it.
-  PLACEHOLDER_IMAGE_URI_DEFAULT: "235494813897.dkr.ecr.us-east-1.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder:latest-amd64"
+  # Resolution precedence (see utils/fetch_scope_configuration):
+  #   scope-configurations provider key deployment.placeholder_image_uri
+  #     > PLACEHOLDER_IMAGE_URI_DEFAULT env var (set per-account on the agent)
+  #       > the public default in scope/scripts/resolve_placeholder_image
+  # Account-specific, so it is NOT set here — provide it via the scope-config
+  # or the agent's extra_envs (Helm) for your installation.
 
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.
-  # Recommended: set via the scope-configurations provider key assume_role.arn
-  # so it's managed per-account without changing code.
-  # This value is only used if the provider does not supply assume_role.arn.
-  ASSUME_ROLE_ARN_DEFAULT: "arn:aws:iam::235494813897:role/nullplatform_aws-services-cluster_lambda_role"
+  # Resolution precedence (see utils/fetch_scope_configuration):
+  #   scope-configurations provider key assume_role.arn
+  #     > ASSUME_ROLE_ARN_DEFAULT env var (set per-account on the agent)
+  # Account-specific, so it is NOT set here — provide it via the scope-config
+  # or the agent's extra_envs (Helm) for your installation. If unset, the
+  # agent's own pod credentials are used.
 
   # ── IAM ────────────────────────────────────────────────────────────────────
   IAM_PROPAGATION_WAIT_SECONDS: 20

From e186fa2cb9c7efaca69064c07af46f8588066158 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Wed, 3 Jun 2026 16:27:06 -0300
Subject: [PATCH 16/24] docs: explain placeholder image config and restore
 PLACEHOLDER_IMAGE_URI_DEFAULT knob

Document why Image-based scopes need a private-ECR placeholder and how the
URI is resolved (provider key > PLACEHOLDER_IMAGE_URI_DEFAULT > public default),
including how to publish one and a troubleshooting entry.

Also re-add PLACEHOLDER_IMAGE_URI_DEFAULT to values.yaml as a commented,
account-agnostic template so operators can pick their own image, and normalize
a stray real-looking account ID in a publish comment to the dummy 123456789012.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md                        | 62 ++++++++++++++++++++++++++++++++
 lambda/scope/placeholder/publish |  2 +-
 lambda/values.yaml               |  7 ++--
 3 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 354c37c..d2d282c 100644
--- a/README.md
+++ b/README.md
@@ -288,6 +288,67 @@ LOG_RETENTION_DAYS: 30
 PARAMETERS_STRATEGY: "env"              # or "secretsmanager"
 ```
 
+### Placeholder Image (Scope Bootstrap)
+
+When a scope is created, the Lambda function and its IAM role must exist **before**
+the first real deployment — otherwise aliases, networking, and IAM have nothing to
+attach to. To bootstrap this, `create-scope` provisions a throwaway **placeholder**
+function that the first deployment then overwrites with the real code.
+
+How the placeholder is sourced depends on the scope's **package type**:
+
+- **Zip** — fully self-contained. A minimal handler ships pre-built and
+  base64-encoded in the repo (`scope/placeholder/placeholder_lambda.zip.b64`) and is
+  used automatically. **No configuration needed.**
+- **Image** — the placeholder must be a container image, and this is where
+  `PLACEHOLDER_IMAGE_URI_DEFAULT` comes in.
+
+#### Why `PLACEHOLDER_IMAGE_URI_DEFAULT` is needed for Image scopes
+
+A Lambda function with `PackageType=Image` can only pull from a **private ECR
+repository in the same account and region** — Lambda rejects `public.ecr.aws`
+images at function-creation time. The built-in default in
+`scope/scripts/resolve_placeholder_image` points at a public image
+(`public.ecr.aws/nullplatform/aws-lambda/nullplatform-lambda-placeholder:latest`),
+which is fine to *validate* but cannot actually back a real Lambda function.
+
+So for Image-based scopes you **must** mirror a placeholder into your own private
+ECR and point the scope at it. The image must also be **single-arch matching the
+scope architecture** (`-amd64` for `x86_64`, `-arm64` for `arm64`) — Lambda does
+not accept multi-arch manifest lists.
+
+#### Resolution precedence
+
+The placeholder image URI is resolved in this order (first match wins):
+
+1. scope-configurations provider key `deployment.placeholder_image_uri` — per-scope,
+   managed without code
+2. `PLACEHOLDER_IMAGE_URI_DEFAULT` env var — the **account-wide** knob, set in
+   `values.yaml` or via the agent's `extra_envs` (Helm)
+3. the public default in `scope/scripts/resolve_placeholder_image` (validation-only
+   fallback; not usable for real Image functions)
+
+Because the URI is account-specific, `values.yaml` ships it commented out — set it
+once per installation and every Image scope in that account uses it, unless a
+specific scope overrides it via the provider key.
+
+#### Publishing a placeholder image
+
+Use the helper script to build and push the single-arch placeholders to your private
+ECR (it creates the repository if it does not exist):
+
+```bash
+export PLACEHOLDER_IMAGE_REPO=123456789012.dkr.ecr.us-east-1.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder
+lambda/scope/placeholder/publish        # pushes <repo>:latest-arm64 and <repo>:latest-amd64
+```
+
+Then set the URI (matching your scope architecture) in `values.yaml` or the agent's
+`extra_envs`:
+
+```yaml
+PLACEHOLDER_IMAGE_URI_DEFAULT: "123456789012.dkr.ecr.us-east-1.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder:latest-arm64"
+```
+
 ### Resource Naming
 
 | Resource | Format | Example |
@@ -507,6 +568,7 @@ export TOFU_LOCK_TABLE=my-lock-table
 | Issue | Cause | Solution |
 |-------|-------|----------|
 | "Function name too long" | Name exceeds 64 chars | Shorten namespace/application/scope slugs |
+| "Placeholder image not found" | Image scope with no private placeholder published | Run `lambda/scope/placeholder/publish` and set `PLACEHOLDER_IMAGE_URI_DEFAULT` (see [Placeholder Image](#placeholder-image-scope-bootstrap)) |
 | "Provisioned concurrency timeout" | Warmup taking too long | Increase `PROVISIONED_CONCURRENCY_MAX_WAIT_SECONDS` |
 | "ALB listener rule capacity" | Too many rules on ALB | Increase `ALB_LISTENER_RULE_CAPACITY` in values.yaml |
 | "Module not composed" | `MODULES_TO_USE` not updated | Verify setup script appends to `MODULES_TO_USE` |
diff --git a/lambda/scope/placeholder/publish b/lambda/scope/placeholder/publish
index db5f60d..98de7a7 100755
--- a/lambda/scope/placeholder/publish
+++ b/lambda/scope/placeholder/publish
@@ -51,7 +51,7 @@ if ! docker buildx version &>/dev/null; then
 fi
 
 # Extract registry host and region from IMAGE_REPO
-ECR_REGISTRY=$(echo "$IMAGE_REPO" | cut -d/ -f1)       # 688720756067.dkr.ecr.us-east-1.amazonaws.com
+ECR_REGISTRY=$(echo "$IMAGE_REPO" | cut -d/ -f1)       # 123456789012.dkr.ecr.us-east-1.amazonaws.com
 ECR_REGION=$(echo "$ECR_REGISTRY" | cut -d. -f4)        # us-east-1
 ECR_REPO_NAME=$(echo "$IMAGE_REPO" | cut -d/ -f2-)      # aws-lambda/nullplatform-lambda-placeholder
 
diff --git a/lambda/values.yaml b/lambda/values.yaml
index 258ad89..3efea6f 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -44,8 +44,11 @@ configuration:
   #   scope-configurations provider key deployment.placeholder_image_uri
   #     > PLACEHOLDER_IMAGE_URI_DEFAULT env var (set per-account on the agent)
   #       > the public default in scope/scripts/resolve_placeholder_image
-  # Account-specific, so it is NOT set here — provide it via the scope-config
-  # or the agent's extra_envs (Helm) for your installation.
+  # The URI is account-specific, so no real value is committed here. To choose
+  # your own placeholder image per installation, uncomment the line below and
+  # point it at your private ECR (or instead set it via the scope-config or the
+  # agent's extra_envs in Helm).
+  # PLACEHOLDER_IMAGE_URI_DEFAULT: "<account-id>.dkr.ecr.<region>.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder:latest-amd64"
 
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.

From 41ed94486beecf3f77f11584a2a62bfdc8cbb381 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Thu, 4 Jun 2026 11:09:41 -0300
Subject: [PATCH 17/24] refactor(setup): consolidate install tofu under
 lambda/setup and merge requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewer feedback: the standalone requirements/ folder should not sit at the
lambda/ root — all installation-time tofu should live together under a setup module.

- Move lambda/specs/tofu/ -> lambda/setup/ (the operator-applied install module).
- Merge lambda/requirements/ into lambda/setup/ (requirements.tf + outputs.tf, and
  its variables folded into setup/variables.tf); remove the requirements/ folder.
- A single 'tofu apply' in lambda/setup now registers the scope type AND provisions
  the IAM policies. The 4 policies are always created; attaching them stays optional
  via create_role / role_name.
- Add the aws provider (~> 5.0) + provider block to setup/provider.tf and a nullable
  aws_region var (IAM is global). 'name' is now a required setup variable.
- Update backend key to lambda/setup/terraform.tfstate.
- Refresh references: installation.md (cd path + IAM vars table), prerequisites.md
  (setup/main.tf), and the iam/setup comment.

Verified with 'tofu validate' (Success).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/installation.md                        | 11 ++++--
 lambda/prerequisites.md                       |  4 +--
 lambda/requirements/variables.tf              | 22 ------------
 lambda/scope/tofu/iam/setup                   |  2 +-
 lambda/setup/backend.tf                       |  5 +++
 lambda/{specs/tofu => setup}/main.tf          |  0
 .../output.tf => setup/outputs.tf}            |  0
 lambda/{specs/tofu => setup}/provider.tf      |  8 +++++
 .../main.tf => setup/requirements.tf}         |  0
 .../tofu => setup}/terraform.tfvars.example   | 17 +++++++++
 lambda/{specs/tofu => setup}/variables.tf     | 35 +++++++++++++++++++
 lambda/specs/tofu/backend.tf                  |  5 ---
 12 files changed, 77 insertions(+), 32 deletions(-)
 delete mode 100644 lambda/requirements/variables.tf
 create mode 100644 lambda/setup/backend.tf
 rename lambda/{specs/tofu => setup}/main.tf (100%)
 rename lambda/{requirements/output.tf => setup/outputs.tf} (100%)
 rename lambda/{specs/tofu => setup}/provider.tf (78%)
 rename lambda/{requirements/main.tf => setup/requirements.tf} (100%)
 rename lambda/{specs/tofu => setup}/terraform.tfvars.example (72%)
 rename lambda/{specs/tofu => setup}/variables.tf (68%)
 delete mode 100644 lambda/specs/tofu/backend.tf

diff --git a/lambda/installation.md b/lambda/installation.md
index d2d3a08..25dd240 100644
--- a/lambda/installation.md
+++ b/lambda/installation.md
@@ -29,17 +29,24 @@ git clone https://github.com/nullplatform/tofu-modules /root/.np/nullplatform/to
 ### 2. Configure variables
 
 ```bash
-cd lambda/tofu
+cd lambda/setup
 cp terraform.tfvars.example terraform.tfvars
 ```
 
-Edit `terraform.tfvars` with your values:
+This module registers the scope type **and** provisions the IAM policies the
+agent needs to operate Lambda scopes (formerly the separate `requirements`
+module — now consolidated here). Edit `terraform.tfvars` with your values:
 
 | Variable | Required | Description |
 |---|---|---|
 | `nrn` | ✅ | Nullplatform Resource Name (`organization:account`) |
 | `np_api_key` | ✅ | Nullplatform API key |
 | `tags_selectors` | ✅ | Tags to select the agent (e.g. `{ environment = "production" }`) |
+| `name` | ✅ | Unique identifier for IAM policy naming (account-global, e.g. `prod-us-east-1`) |
+| `aws_region` | — | AWS provider region. IAM is global; leave unset to resolve from the environment |
+| `create_role` | — | `true` to create a new IAM role and attach the Lambda policies to it |
+| `trusted_arns` | — | Principal ARNs allowed to assume the created role (with `create_role = true`) |
+| `role_name` | — | Existing IAM role to attach the Lambda policies to (instead of `create_role`) |
 | `github_branch` | — | Branch to fetch specs from (default: `main`) |
 | `repo_path` | — | Path where scopes-lambda is cloned on the agent |
 | `overrides_enabled` | — | Set `true` to enable config overrides from scopes-networking |
diff --git a/lambda/prerequisites.md b/lambda/prerequisites.md
index 9e216f4..8663155 100644
--- a/lambda/prerequisites.md
+++ b/lambda/prerequisites.md
@@ -229,7 +229,7 @@ Agents run in a Kubernetes pod and authenticate to AWS via a **Service Account**
 The IAM policies above let the agent CREATE Lambda functions and target
 groups, but the `create-scope` workflow ALSO depends on three runtime
 artifacts that must exist BEFORE the first scope is created. None are
-auto-created by the bundled `install/tofu/main.tf` today — the operator
+auto-created by the bundled `setup/main.tf` today — the operator
 must provision them.
 
 ### 1. Placeholder image (private ECR)
@@ -383,7 +383,7 @@ This applies to **every** ECR repository that ever stores a Lambda
 image:
 
 1. The placeholder ECR (created during installation, addressed by
-   `lambda/tofu/main.tf` if you use the bundled module — the policy is
+   `lambda/setup/main.tf` if you use the bundled module — the policy is
    already applied there).
 2. **The per-application ECR repositories** that `np asset push`
    creates dynamically when each app does its first build, named
diff --git a/lambda/requirements/variables.tf b/lambda/requirements/variables.tf
deleted file mode 100644
index d8b8298..0000000
--- a/lambda/requirements/variables.tf
+++ /dev/null
@@ -1,22 +0,0 @@
-variable "name" {
-  description = "Unique identifier for policy naming. Must be unique per AWS account (IAM policy names are account-global). Example: \"prod-us-east-1\"."
-  type        = string
-}
-
-variable "create_role" {
-  description = "When true, creates a new IAM role and attaches all policies to it. The role will allow the ARNs in trusted_arns to assume it via sts:AssumeRole."
-  type        = bool
-  default     = false
-}
-
-variable "role_name" {
-  description = "Existing IAM role name to attach the Lambda policies to. Ignored when create_role is true."
-  type        = string
-  default     = null
-}
-
-variable "trusted_arns" {
-  description = "List of IAM principal ARNs allowed to assume the role. Only used when create_role is true."
-  type        = list(string)
-  default     = []
-}
diff --git a/lambda/scope/tofu/iam/setup b/lambda/scope/tofu/iam/setup
index bbc7a40..09fa234 100755
--- a/lambda/scope/tofu/iam/setup
+++ b/lambda/scope/tofu/iam/setup
@@ -5,7 +5,7 @@ source "$SERVICE_PATH/utils/log"
 log info "🔍 Configuring IAM role for deployment..."
 
 # Prefix with "np-lambda-" so the role name matches the iam:CreateRole/PassRole
-# Resource constraint in lambda/requirements (arn:aws:iam::*:role/np-lambda-*).
+# Resource constraint in lambda/setup (arn:aws:iam::*:role/np-lambda-*).
 iam_role_name="np-lambda-${LAMBDA_FUNCTION_NAME}-role"
 iam_role_name="${iam_role_name:0:64}"
 
diff --git a/lambda/setup/backend.tf b/lambda/setup/backend.tf
new file mode 100644
index 0000000..a63cc72
--- /dev/null
+++ b/lambda/setup/backend.tf
@@ -0,0 +1,5 @@
+terraform {
+  backend "s3" {
+    key = "lambda/setup/terraform.tfstate"
+  }
+}
diff --git a/lambda/specs/tofu/main.tf b/lambda/setup/main.tf
similarity index 100%
rename from lambda/specs/tofu/main.tf
rename to lambda/setup/main.tf
diff --git a/lambda/requirements/output.tf b/lambda/setup/outputs.tf
similarity index 100%
rename from lambda/requirements/output.tf
rename to lambda/setup/outputs.tf
diff --git a/lambda/specs/tofu/provider.tf b/lambda/setup/provider.tf
similarity index 78%
rename from lambda/specs/tofu/provider.tf
rename to lambda/setup/provider.tf
index 51d4024..4fa3661 100644
--- a/lambda/specs/tofu/provider.tf
+++ b/lambda/setup/provider.tf
@@ -16,9 +16,17 @@ terraform {
       source  = "hashicorp/null"
       version = "~> 3.2"
     }
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 5.0"
+    }
   }
 }
 
 provider "nullplatform" {
   api_key = var.np_api_key
 }
+
+provider "aws" {
+  region = var.aws_region
+}
diff --git a/lambda/requirements/main.tf b/lambda/setup/requirements.tf
similarity index 100%
rename from lambda/requirements/main.tf
rename to lambda/setup/requirements.tf
diff --git a/lambda/specs/tofu/terraform.tfvars.example b/lambda/setup/terraform.tfvars.example
similarity index 72%
rename from lambda/specs/tofu/terraform.tfvars.example
rename to lambda/setup/terraform.tfvars.example
index b5951da..7861244 100644
--- a/lambda/specs/tofu/terraform.tfvars.example
+++ b/lambda/setup/terraform.tfvars.example
@@ -12,6 +12,23 @@ tags_selectors = {
   environment = "production"
 }
 
+# Unique identifier for IAM policy naming (policy names are account-global).
+name = "prod-us-east-1"
+
+################################################################################
+# IAM permissions (optional)
+################################################################################
+
+# AWS provider region (IAM is global; leave unset to resolve from the environment).
+# aws_region = "us-east-1"
+
+# Attach the Lambda policies to a brand-new role (and trust the given principals)...
+# create_role  = true
+# trusted_arns = ["arn:aws:iam::123456789012:role/my-agent-role"]
+
+# ...or attach them to an existing role instead:
+# role_name = "my-existing-agent-role"
+
 ################################################################################
 # Repository (override if using a fork or private mirror)
 ################################################################################
diff --git a/lambda/specs/tofu/variables.tf b/lambda/setup/variables.tf
similarity index 68%
rename from lambda/specs/tofu/variables.tf
rename to lambda/setup/variables.tf
index 0a1c9ba..52ad529 100644
--- a/lambda/specs/tofu/variables.tf
+++ b/lambda/setup/variables.tf
@@ -95,3 +95,38 @@ variable "overrides_service_path" {
   type        = string
   default     = null
 }
+
+################################################################################
+# IAM permissions (requirements)
+# Policies the agent needs to operate Lambda scopes. IAM is global, but the AWS
+# provider still needs a region to initialize.
+################################################################################
+
+variable "aws_region" {
+  description = "AWS region used to initialize the AWS provider. IAM resources are global; leave null to resolve from the environment (AWS_REGION / profile)."
+  type        = string
+  default     = null
+}
+
+variable "name" {
+  description = "Unique identifier for policy naming. Must be unique per AWS account (IAM policy names are account-global). Example: \"prod-us-east-1\"."
+  type        = string
+}
+
+variable "create_role" {
+  description = "When true, creates a new IAM role and attaches all policies to it. The role will allow the ARNs in trusted_arns to assume it via sts:AssumeRole."
+  type        = bool
+  default     = false
+}
+
+variable "role_name" {
+  description = "Existing IAM role name to attach the Lambda policies to. Ignored when create_role is true."
+  type        = string
+  default     = null
+}
+
+variable "trusted_arns" {
+  description = "List of IAM principal ARNs allowed to assume the role. Only used when create_role is true."
+  type        = list(string)
+  default     = []
+}
diff --git a/lambda/specs/tofu/backend.tf b/lambda/specs/tofu/backend.tf
deleted file mode 100644
index 7330a13..0000000
--- a/lambda/specs/tofu/backend.tf
+++ /dev/null
@@ -1,5 +0,0 @@
-terraform {
-  backend "s3" {
-    key = "lambda/install/terraform.tfstate"
-  }
-}

From 779eef99afc7b328103b3b3b665c10b64d2efe97 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 8 Jun 2026 11:59:27 -0300
Subject: [PATCH 18/24] feat(assume-role): resolve role ARN from nullplatform
 IAM provider by selector

The agent resolves the IAM role to assume from the "AWS IAM" provider
(category Identity & Access Control, spec aws-iam-configuration) declared in
nullplatform, matching its arns list by the "lambda" selector. Precedence:
ASSUME_ROLE_ARN env -> IAM provider -> scope-configurations assume_role.arn
-> ASSUME_ROLE_ARN_DEFAULT -> pod IRSA.

- assume_role_lib (new): pure arn_for_selector_from_json +
  provider_arn_for_selector (np provider list -> read, since list omits deep
  attributes). Mirrors the services-s3 mechanism.
- fetch_scope_configuration: insert the provider-by-selector lookup as
  priority 2, deriving the account NRN from the scope NRN (strip :namespace=).
- diagnose/build_context: same resolution before sourcing assume_role (it
  previously sourced assume_role without ever resolving an ARN).
- values.yaml: document the precedence and the ASSUME_ROLE_SELECTOR override.
- tests: BATS unit tests for both lib functions using the mock_np harness.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/diagnose/build_context                 |  13 ++
 .../scope/tests/scripts/assume_role_lib.bats  | 116 ++++++++++++++++++
 lambda/utils/assume_role_lib                  |  43 +++++++
 lambda/utils/fetch_scope_configuration        |  36 +++++-
 lambda/values.yaml                            |  16 ++-
 5 files changed, 217 insertions(+), 7 deletions(-)
 create mode 100644 lambda/scope/tests/scripts/assume_role_lib.bats
 create mode 100644 lambda/utils/assume_role_lib

diff --git a/lambda/diagnose/build_context b/lambda/diagnose/build_context
index f234a67..d17e244 100755
--- a/lambda/diagnose/build_context
+++ b/lambda/diagnose/build_context
@@ -15,6 +15,19 @@ if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
 fi
 
 source "$SERVICE_PATH/utils/lambda_function_name"
+
+# Resolve the IAM role ARN to assume before any AWS call, mirroring
+# fetch_scope_configuration: env override -> "AWS IAM" provider matched by the
+# "lambda" selector (account nrn) -> ASSUME_ROLE_ARN_DEFAULT -> IRSA.
+source "$SERVICE_PATH/utils/assume_role_lib"
+ACCOUNT_NRN=$(echo "$SCOPE_NRN" | sed 's/:namespace=.*$//')
+ASSUME_ROLE_SELECTOR="${ASSUME_ROLE_SELECTOR:-lambda}"
+ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-}"
+if [ -z "$ASSUME_ROLE_ARN" ] && [ -n "$ACCOUNT_NRN" ] && [ -n "$ASSUME_ROLE_SELECTOR" ]; then
+  ASSUME_ROLE_ARN=$(provider_arn_for_selector "$ACCOUNT_NRN" "$ASSUME_ROLE_SELECTOR")
+fi
+export ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-${ASSUME_ROLE_ARN_DEFAULT:-}}"
+
 source "$SERVICE_PATH/utils/assume_role"
 
 lambda_info=$(aws lambda get-function --function-name "$LAMBDA_FUNCTION_NAME" --output json 2>/dev/null || echo "{}")
diff --git a/lambda/scope/tests/scripts/assume_role_lib.bats b/lambda/scope/tests/scripts/assume_role_lib.bats
new file mode 100644
index 0000000..8b7da30
--- /dev/null
+++ b/lambda/scope/tests/scripts/assume_role_lib.bats
@@ -0,0 +1,116 @@
+#!/usr/bin/env bats
+# Unit tests for the pure resolution functions in utils/assume_role_lib.
+#
+# arn_for_selector_from_json is pure jq — exercised directly.
+# provider_arn_for_selector orchestrates `np provider list` -> `np provider read`;
+# we stub np() branching on its arguments (stateless, so it survives the
+# command-substitution subshells the function uses) instead of a sequential mock.
+
+setup() {
+  TEST_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")" && pwd)"
+  HELPERS_DIR="$TEST_DIR/helpers"
+  LAMBDA_DIR="$(cd "$TEST_DIR/../../.." && pwd)"
+
+  load "$HELPERS_DIR/test_helper.bash"
+
+  # Stub np branching on args; FAKE_NP_MODE tweaks the `provider list` result.
+  np() {
+    local args="$*"
+    case "$args" in
+      *"provider list"*)
+        if [ "${FAKE_NP_MODE:-}" = "no_provider" ]; then
+          echo '{"results":[]}'
+        else
+          echo '{"results":[{"id":"prov-123"}]}'
+        fi
+        ;;
+      *"provider read"*)
+        echo '{"attributes":{"iam_role_arns":{"arns":[{"selector":"my-scope","arn":"arn:aws:iam::123456789012:role/test-lambda-role"}]}}}'
+        ;;
+      *) echo '{}' ;;
+    esac
+  }
+  export -f np
+
+  source "$LAMBDA_DIR/utils/assume_role_lib"
+}
+
+# --- arn_for_selector_from_json (pure) -------------------------------------
+
+JSON='{"attributes":{"iam_role_arns":{"arns":[{"selector":"s3","arn":"arn:aws:iam::111:role/s3"},{"selector":"lambda","arn":"arn:aws:iam::111:role/lambda"}]}}}'
+
+@test "arn_for_selector_from_json: matching selector returns its arn" {
+  run arn_for_selector_from_json "$JSON" lambda
+  assert_success
+  [ "$output" = "arn:aws:iam::111:role/lambda" ]
+}
+
+@test "arn_for_selector_from_json: unknown selector returns empty" {
+  run arn_for_selector_from_json "$JSON" ecs
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "arn_for_selector_from_json: missing arns key returns empty" {
+  run arn_for_selector_from_json '{"attributes":{}}' s3
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "arn_for_selector_from_json: empty input returns empty" {
+  run arn_for_selector_from_json '' s3
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "arn_for_selector_from_json: malformed json returns empty" {
+  run arn_for_selector_from_json 'not json' s3
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "arn_for_selector_from_json: empty selector returns empty" {
+  run arn_for_selector_from_json "$JSON" ''
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "arn_for_selector_from_json: duplicate selector takes first" {
+  local dup='{"attributes":{"iam_role_arns":{"arns":[{"selector":"s3","arn":"first"},{"selector":"s3","arn":"second"}]}}}'
+  run arn_for_selector_from_json "$dup" s3
+  assert_success
+  [ "$output" = "first" ]
+}
+
+# --- provider_arn_for_selector (np list -> read orchestration) -------------
+
+@test "provider_arn_for_selector: resolves arn for matching selector" {
+  run provider_arn_for_selector "organization=1:account=2" my-scope
+  assert_success
+  [ "$output" = "arn:aws:iam::123456789012:role/test-lambda-role" ]
+}
+
+@test "provider_arn_for_selector: no provider instance returns empty" {
+  export FAKE_NP_MODE=no_provider
+  run provider_arn_for_selector "organization=1:account=2" my-scope
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "provider_arn_for_selector: selector not in provider returns empty" {
+  run provider_arn_for_selector "organization=1:account=2" does-not-exist
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "provider_arn_for_selector: empty nrn returns empty" {
+  run provider_arn_for_selector "" my-scope
+  assert_success
+  [ -z "$output" ]
+}
+
+@test "provider_arn_for_selector: empty selector returns empty" {
+  run provider_arn_for_selector "organization=1:account=2" ""
+  assert_success
+  [ -z "$output" ]
+}
diff --git a/lambda/utils/assume_role_lib b/lambda/utils/assume_role_lib
new file mode 100644
index 0000000..5cd186c
--- /dev/null
+++ b/lambda/utils/assume_role_lib
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Sourceable library of PURE helpers for assume-role resolution.
+# Defines functions only — NO side effects on source, so it can be unit-tested
+# (see scope/tests/scripts/assume_role_lib.bats) and reused by the scripts that
+# resolve the role to assume (fetch_scope_configuration, diagnose/build_context).
+
+# arn_for_selector_from_json <provider_read_json> <selector>
+# Given the JSON returned by `np provider read --id <id> --format json` and a
+# selector string, echoes the matching IAM role ARN, or empty string if there
+# is no match / the input is missing or malformed. First match wins.
+arn_for_selector_from_json() {
+  local json="$1" selector="$2"
+  [ -n "$json" ] || return 0
+  [ -n "$selector" ] || return 0
+  printf '%s' "$json" | jq -r --arg sel "$selector" '
+    [ .attributes.iam_role_arns.arns[]?
+      | select(.selector == $sel)
+      | .arn ]
+    | first // ""' 2>/dev/null
+}
+
+# provider_arn_for_selector <nrn> <selector>
+# Looks up the "AWS IAM" provider (specification aws-iam-configuration, category
+# "Identity & Access Control") at <nrn>, reads it, and echoes the ARN matching
+# <selector>. Empty string if no provider / no match. Requires np + jq.
+# NOTE: `np provider list` does NOT return deep attributes, so we list to get the
+# provider id and then `np provider read --id` to obtain the arns (same two-step
+# pattern used for account.region resolution in fetch_scope_configuration).
+provider_arn_for_selector() {
+  local nrn="$1" selector="$2"
+  [ -n "$nrn" ] || return 0
+  [ -n "$selector" ] || return 0
+
+  local pid data
+  pid=$(np provider list --nrn "$nrn" \
+          --specification_slug aws-iam-configuration \
+          --format json --limit 100 2>/dev/null \
+        | jq -r '[ (.results // [])[] ] | first | .id // ""' 2>/dev/null)
+  [ -n "$pid" ] && [ "$pid" != "null" ] || return 0
+
+  data=$(np provider read --id "$pid" --format json 2>/dev/null)
+  arn_for_selector_from_json "$data" "$selector"
+}
diff --git a/lambda/utils/fetch_scope_configuration b/lambda/utils/fetch_scope_configuration
index f002d7f..9e467db 100755
--- a/lambda/utils/fetch_scope_configuration
+++ b/lambda/utils/fetch_scope_configuration
@@ -78,8 +78,40 @@ log debug "   ✅ placeholder_image_uri=${PLACEHOLDER_IMAGE_URI:-(not set, using
 NULL_AGENT_LAYER_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.agent.null_agent_layer_arn // empty')
 log debug "   ✅ null_agent_layer_arn=$NULL_AGENT_LAYER_ARN"
 
-# From scope-configurations category (optional — fallback to env var set in values.yaml)
-ASSUME_ROLE_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.assume_role.arn // empty')
+# --- Resolve the IAM role ARN to assume -------------------------------------
+# Precedence (highest to lowest):
+#   1. $ASSUME_ROLE_ARN already in the environment (explicit override).
+#   2. The "AWS IAM" provider (Identity & Access Control, spec
+#      aws-iam-configuration) at the ACCOUNT nrn, matched by selector. The
+#      selector defaults to "lambda"; override with the ASSUME_ROLE_SELECTOR
+#      env var (values.yaml / agent extra_envs) if the provider uses another key.
+#   3. scope-configurations provider key assume_role.arn (back-compat).
+#   4. $ASSUME_ROLE_ARN_DEFAULT env var (per-account default on the agent).
+#   5. None of the above -> empty -> use the pod's IRSA credentials directly.
+# shellcheck source=assume_role_lib
+source "$SERVICE_PATH/utils/assume_role_lib"
+
+# The IAM provider lives at the account level; derive the account nrn from the
+# scope nrn by stripping everything from :namespace= onward.
+ACCOUNT_NRN=$(echo "$NRN" | sed 's/:namespace=.*$//')
+ASSUME_ROLE_SELECTOR="${ASSUME_ROLE_SELECTOR:-lambda}"
+
+ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-}"
+
+# 2. nullplatform IAM provider, by selector.
+if [ -z "$ASSUME_ROLE_ARN" ] && [ -n "$ACCOUNT_NRN" ] && [ -n "$ASSUME_ROLE_SELECTOR" ]; then
+  ASSUME_ROLE_ARN=$(provider_arn_for_selector "$ACCOUNT_NRN" "$ASSUME_ROLE_SELECTOR")
+  if [ -n "$ASSUME_ROLE_ARN" ]; then
+    log debug "   ✅ assume_role_arn from IAM provider (selector=$ASSUME_ROLE_SELECTOR)"
+  fi
+fi
+
+# 3. scope-configurations provider key (back-compat).
+if [ -z "$ASSUME_ROLE_ARN" ]; then
+  ASSUME_ROLE_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.assume_role.arn // empty')
+fi
+
+# 4. Per-account default env var.
 ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-${ASSUME_ROLE_ARN_DEFAULT:-}}"
 log debug "   ✅ assume_role_arn=${ASSUME_ROLE_ARN:-(not set, using pod credentials)}"
 
diff --git a/lambda/values.yaml b/lambda/values.yaml
index 3efea6f..df1c754 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -53,11 +53,17 @@ configuration:
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.
   # Resolution precedence (see utils/fetch_scope_configuration):
-  #   scope-configurations provider key assume_role.arn
-  #     > ASSUME_ROLE_ARN_DEFAULT env var (set per-account on the agent)
-  # Account-specific, so it is NOT set here — provide it via the scope-config
-  # or the agent's extra_envs (Helm) for your installation. If unset, the
-  # agent's own pod credentials are used.
+  #   1. ASSUME_ROLE_ARN env var (explicit override)
+  #   2. "AWS IAM" provider (Identity & Access Control, spec aws-iam-configuration)
+  #      at the account NRN, matched by selector. The selector defaults to
+  #      "lambda"; override it with ASSUME_ROLE_SELECTOR if the provider's arns
+  #      list uses a different key.
+  #   3. scope-configurations provider key assume_role.arn (back-compat)
+  #   4. ASSUME_ROLE_ARN_DEFAULT env var (set per-account on the agent)
+  # All account-specific, so none are set here — provide them via the IAM
+  # provider, the scope-config, or the agent's extra_envs (Helm). If nothing
+  # resolves, the agent's own pod credentials (IRSA) are used.
+  # ASSUME_ROLE_SELECTOR: "lambda"
 
   # ── IAM ────────────────────────────────────────────────────────────────────
   IAM_PROPAGATION_WAIT_SECONDS: 20

From 4ff57c40c0877cf36c7b37f609e4b360526f30c2 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Mon, 8 Jun 2026 14:55:44 -0300
Subject: [PATCH 19/24] chore(values): set PLACEHOLDER_IMAGE_URI_DEFAULT for
 this installation

Commit the account's private ECR placeholder image as the default, overridable
per scope via the scope-config provider or per agent via Helm extra_envs.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/values.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lambda/values.yaml b/lambda/values.yaml
index df1c754..179dd6b 100644
--- a/lambda/values.yaml
+++ b/lambda/values.yaml
@@ -44,11 +44,11 @@ configuration:
   #   scope-configurations provider key deployment.placeholder_image_uri
   #     > PLACEHOLDER_IMAGE_URI_DEFAULT env var (set per-account on the agent)
   #       > the public default in scope/scripts/resolve_placeholder_image
-  # The URI is account-specific, so no real value is committed here. To choose
-  # your own placeholder image per installation, uncomment the line below and
-  # point it at your private ECR (or instead set it via the scope-config or the
-  # agent's extra_envs in Helm).
-  # PLACEHOLDER_IMAGE_URI_DEFAULT: "<account-id>.dkr.ecr.<region>.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder:latest-amd64"
+  # Default placeholder image for this installation. Account-specific, but
+  # committed here intentionally. Can still be overridden per scope via the
+  # scope-config provider key deployment.placeholder_image_uri, or per agent
+  # via extra_envs in Helm.
+  PLACEHOLDER_IMAGE_URI_DEFAULT: "235494813897.dkr.ecr.us-east-1.amazonaws.com/aws-lambda/nullplatform-lambda-placeholder:latest"
 
   # ── Assume Role ────────────────────────────────────────────────────────────
   # IAM role ARN to assume before any AWS operation.

From 5109c0ef721f5128580bc8ba545658cb380e3fa8 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 9 Jun 2026 12:19:12 -0300
Subject: [PATCH 20/24] feat(workflows): assume IAM role via dedicated first
 step in every workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The assumed credentials were never reaching the steps that need them: assume
role ran inside build_context, but the workflow engine drops a step's exported
vars unless declared as output:environment — and no workflow declared the AWS
credentials. So tofu/aws steps ran with the pod's IRSA identity and failed on
permissions.

Fix: a dedicated `assume_role` step runs first in every AWS-touching workflow,
resolves the role and assumes it, and exports AWS_ACCESS_KEY_ID/SECRET/
SESSION_TOKEN as output:environment so all later steps inherit them.

- utils/assume_role_step (new): resolves NRN from CONTEXT, assumes, exports creds.
- utils/assume_role_lib: add resolve_assume_role_arn (env -> IAM provider by
  selector -> scope-config -> DEFAULT) and scope_config_assume_role_arn.
- fetch_scope_configuration, diagnose/build_context: remove the now-centralized
  assume-role resolution (single source of truth; avoids self-assume).
- 18 workflow yamls: prepend the assume_role step with the 3 credential outputs.
- assume_role_lib.bats: tests for the precedence chain.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/deployment/workflows/blue_green.yaml   | 10 ++++
 lambda/deployment/workflows/delete.yaml       | 10 ++++
 lambda/deployment/workflows/diagnose.yaml     | 10 ++++
 lambda/deployment/workflows/finalize.yaml     | 10 ++++
 lambda/deployment/workflows/initial.yaml      | 10 ++++
 lambda/deployment/workflows/rollback.yaml     | 10 ++++
 .../deployment/workflows/switch_traffic.yaml  | 10 ++++
 lambda/diagnose/build_context                 | 16 +-----
 lambda/instance/workflows/list.yaml           | 10 ++++
 lambda/log/workflows/log.yaml                 | 10 ++++
 lambda/metric/workflows/list.yaml             | 10 ++++
 lambda/metric/workflows/metric.yaml           | 10 ++++
 .../scope/tests/scripts/assume_role_lib.bats  | 56 +++++++++++++++++++
 .../adjust_provisioned_concurrency.yaml       | 10 ++++
 .../adjust_reserved_concurrency.yaml          | 10 ++++
 lambda/scope/workflows/create.yaml            | 10 ++++
 lambda/scope/workflows/delete.yaml            | 10 ++++
 lambda/scope/workflows/diagnose.yaml          | 10 ++++
 lambda/scope/workflows/invoke.yaml            | 10 ++++
 lambda/scope/workflows/update.yaml            | 10 ++++
 lambda/utils/assume_role_lib                  | 35 ++++++++++++
 lambda/utils/assume_role_step                 | 38 +++++++++++++
 lambda/utils/fetch_scope_configuration        | 44 ++-------------
 23 files changed, 316 insertions(+), 53 deletions(-)
 create mode 100755 lambda/utils/assume_role_step

diff --git a/lambda/deployment/workflows/blue_green.yaml b/lambda/deployment/workflows/blue_green.yaml
index a73b128..c0913e4 100644
--- a/lambda/deployment/workflows/blue_green.yaml
+++ b/lambda/deployment/workflows/blue_green.yaml
@@ -3,6 +3,16 @@ include:
 configuration:
   DEPLOYMENT_STRATEGY: "blue_green"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/deployment/build_context"
diff --git a/lambda/deployment/workflows/delete.yaml b/lambda/deployment/workflows/delete.yaml
index 548c749..01b0933 100644
--- a/lambda/deployment/workflows/delete.yaml
+++ b/lambda/deployment/workflows/delete.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/deployment/build_context"
diff --git a/lambda/deployment/workflows/diagnose.yaml b/lambda/deployment/workflows/diagnose.yaml
index e9425ea..10d6d39 100644
--- a/lambda/deployment/workflows/diagnose.yaml
+++ b/lambda/deployment/workflows/diagnose.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/diagnose/build_context"
diff --git a/lambda/deployment/workflows/finalize.yaml b/lambda/deployment/workflows/finalize.yaml
index 2c49db9..98e32e4 100644
--- a/lambda/deployment/workflows/finalize.yaml
+++ b/lambda/deployment/workflows/finalize.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/deployment/build_context"
diff --git a/lambda/deployment/workflows/initial.yaml b/lambda/deployment/workflows/initial.yaml
index 0b4723d..580642a 100644
--- a/lambda/deployment/workflows/initial.yaml
+++ b/lambda/deployment/workflows/initial.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/deployment/build_context"
diff --git a/lambda/deployment/workflows/rollback.yaml b/lambda/deployment/workflows/rollback.yaml
index 49537a4..59c75db 100644
--- a/lambda/deployment/workflows/rollback.yaml
+++ b/lambda/deployment/workflows/rollback.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/deployment/build_context"
diff --git a/lambda/deployment/workflows/switch_traffic.yaml b/lambda/deployment/workflows/switch_traffic.yaml
index b00f893..95023c5 100644
--- a/lambda/deployment/workflows/switch_traffic.yaml
+++ b/lambda/deployment/workflows/switch_traffic.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/deployment/build_context"
diff --git a/lambda/diagnose/build_context b/lambda/diagnose/build_context
index d17e244..0120d32 100755
--- a/lambda/diagnose/build_context
+++ b/lambda/diagnose/build_context
@@ -16,19 +16,9 @@ fi
 
 source "$SERVICE_PATH/utils/lambda_function_name"
 
-# Resolve the IAM role ARN to assume before any AWS call, mirroring
-# fetch_scope_configuration: env override -> "AWS IAM" provider matched by the
-# "lambda" selector (account nrn) -> ASSUME_ROLE_ARN_DEFAULT -> IRSA.
-source "$SERVICE_PATH/utils/assume_role_lib"
-ACCOUNT_NRN=$(echo "$SCOPE_NRN" | sed 's/:namespace=.*$//')
-ASSUME_ROLE_SELECTOR="${ASSUME_ROLE_SELECTOR:-lambda}"
-ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-}"
-if [ -z "$ASSUME_ROLE_ARN" ] && [ -n "$ACCOUNT_NRN" ] && [ -n "$ASSUME_ROLE_SELECTOR" ]; then
-  ASSUME_ROLE_ARN=$(provider_arn_for_selector "$ACCOUNT_NRN" "$ASSUME_ROLE_SELECTOR")
-fi
-export ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-${ASSUME_ROLE_ARN_DEFAULT:-}}"
-
-source "$SERVICE_PATH/utils/assume_role"
+# NOTE: The IAM role is assumed by the dedicated `assume_role` step that runs
+# first in the workflow (see utils/assume_role_step); credentials are already in
+# the environment here.
 
 lambda_info=$(aws lambda get-function --function-name "$LAMBDA_FUNCTION_NAME" --output json 2>/dev/null || echo "{}")
 LAMBDA_FUNCTION_ARN=$(echo "$lambda_info" | jq -r '.Configuration.FunctionArn // ""')
diff --git a/lambda/instance/workflows/list.yaml b/lambda/instance/workflows/list.yaml
index ad29d14..0efef23 100644
--- a/lambda/instance/workflows/list.yaml
+++ b/lambda/instance/workflows/list.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/instance/build_context"
diff --git a/lambda/log/workflows/log.yaml b/lambda/log/workflows/log.yaml
index 391733d..bb48230 100644
--- a/lambda/log/workflows/log.yaml
+++ b/lambda/log/workflows/log.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/log/build_context"
diff --git a/lambda/metric/workflows/list.yaml b/lambda/metric/workflows/list.yaml
index ecdf27e..e8c2bf6 100644
--- a/lambda/metric/workflows/list.yaml
+++ b/lambda/metric/workflows/list.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: list_metrics
     type: script
     file: "$SERVICE_PATH/metric/list_metrics"
diff --git a/lambda/metric/workflows/metric.yaml b/lambda/metric/workflows/metric.yaml
index a3b3e61..725b76e 100644
--- a/lambda/metric/workflows/metric.yaml
+++ b/lambda/metric/workflows/metric.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/metric/build_context"
diff --git a/lambda/scope/tests/scripts/assume_role_lib.bats b/lambda/scope/tests/scripts/assume_role_lib.bats
index 8b7da30..2428799 100644
--- a/lambda/scope/tests/scripts/assume_role_lib.bats
+++ b/lambda/scope/tests/scripts/assume_role_lib.bats
@@ -114,3 +114,59 @@ JSON='{"attributes":{"iam_role_arns":{"arns":[{"selector":"s3","arn":"arn:aws:ia
   assert_success
   [ -z "$output" ]
 }
+
+# --- resolve_assume_role_arn (full precedence chain) -----------------------
+# Each test defines its own stateless np stub (branches on args) so it survives
+# the command-substitution subshells the resolver uses.
+
+@test "resolve_assume_role_arn: env override wins over everything" {
+  export ASSUME_ROLE_ARN="arn:env"
+  run resolve_assume_role_arn "organization=1:account=2" lambda
+  assert_success
+  [ "$output" = "arn:env" ]
+}
+
+@test "resolve_assume_role_arn: IAM provider when no env override" {
+  np() {
+    case "$*" in
+      *"--specification_slug aws-iam-configuration"*) echo '{"results":[{"id":"iam-1"}]}' ;;
+      *"provider read"*) echo '{"attributes":{"iam_role_arns":{"arns":[{"selector":"lambda","arn":"arn:provider:lambda"}]}}}' ;;
+      *) echo '{}' ;;
+    esac
+  }
+  export -f np
+  run resolve_assume_role_arn "organization=1:account=2" lambda
+  assert_success
+  [ "$output" = "arn:provider:lambda" ]
+}
+
+@test "resolve_assume_role_arn: scope-config fallback when provider misses" {
+  np() {
+    case "$*" in
+      *"--specification_slug aws-iam-configuration"*) echo '{"results":[]}' ;;
+      *"--categories scope-configurations"*) echo '{"results":[{"attributes":{"assume_role":{"arn":"arn:scopecfg:legacy"}}}]}' ;;
+      *) echo '{}' ;;
+    esac
+  }
+  export -f np
+  run resolve_assume_role_arn "organization=1:account=2" lambda
+  assert_success
+  [ "$output" = "arn:scopecfg:legacy" ]
+}
+
+@test "resolve_assume_role_arn: ASSUME_ROLE_ARN_DEFAULT when nothing else resolves" {
+  np() { echo '{"results":[]}'; }
+  export -f np
+  export ASSUME_ROLE_ARN_DEFAULT="arn:default"
+  run resolve_assume_role_arn "organization=1:account=2" lambda
+  assert_success
+  [ "$output" = "arn:default" ]
+}
+
+@test "resolve_assume_role_arn: empty (IRSA) when nothing resolves and no default" {
+  np() { echo '{"results":[]}'; }
+  export -f np
+  run resolve_assume_role_arn "organization=1:account=2" lambda
+  assert_success
+  [ -z "$output" ]
+}
diff --git a/lambda/scope/workflows/adjust_provisioned_concurrency.yaml b/lambda/scope/workflows/adjust_provisioned_concurrency.yaml
index 93b2a53..4728283 100644
--- a/lambda/scope/workflows/adjust_provisioned_concurrency.yaml
+++ b/lambda/scope/workflows/adjust_provisioned_concurrency.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/scope/workflows/adjust_reserved_concurrency.yaml b/lambda/scope/workflows/adjust_reserved_concurrency.yaml
index 5fab200..3039133 100644
--- a/lambda/scope/workflows/adjust_reserved_concurrency.yaml
+++ b/lambda/scope/workflows/adjust_reserved_concurrency.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/scope/workflows/create.yaml b/lambda/scope/workflows/create.yaml
index de08b9a..8ec86c3 100644
--- a/lambda/scope/workflows/create.yaml
+++ b/lambda/scope/workflows/create.yaml
@@ -3,6 +3,16 @@ include:
 configuration:
   TOFU_ACTION: "apply"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/scope/workflows/delete.yaml b/lambda/scope/workflows/delete.yaml
index 2fb8faf..c5bdf87 100644
--- a/lambda/scope/workflows/delete.yaml
+++ b/lambda/scope/workflows/delete.yaml
@@ -3,6 +3,16 @@ include:
 configuration:
   TOFU_ACTION: "destroy"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/scope/workflows/diagnose.yaml b/lambda/scope/workflows/diagnose.yaml
index dd990df..f084fa4 100644
--- a/lambda/scope/workflows/diagnose.yaml
+++ b/lambda/scope/workflows/diagnose.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/scope/workflows/invoke.yaml b/lambda/scope/workflows/invoke.yaml
index 58af584..fb9a25a 100644
--- a/lambda/scope/workflows/invoke.yaml
+++ b/lambda/scope/workflows/invoke.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/scope/workflows/update.yaml b/lambda/scope/workflows/update.yaml
index f9a6bc4..9969bc8 100644
--- a/lambda/scope/workflows/update.yaml
+++ b/lambda/scope/workflows/update.yaml
@@ -1,6 +1,16 @@
 include:
   - "$SERVICE_PATH/values.yaml"
 steps:
+  - name: assume_role
+    type: script
+    file: "$SERVICE_PATH/utils/assume_role_step"
+    output:
+      - name: AWS_ACCESS_KEY_ID
+        type: environment
+      - name: AWS_SECRET_ACCESS_KEY
+        type: environment
+      - name: AWS_SESSION_TOKEN
+        type: environment
   - name: build_context
     type: script
     file: "$SERVICE_PATH/scope/build_context"
diff --git a/lambda/utils/assume_role_lib b/lambda/utils/assume_role_lib
index 5cd186c..7f2b503 100644
--- a/lambda/utils/assume_role_lib
+++ b/lambda/utils/assume_role_lib
@@ -41,3 +41,38 @@ provider_arn_for_selector() {
   data=$(np provider read --id "$pid" --format json 2>/dev/null)
   arn_for_selector_from_json "$data" "$selector"
 }
+
+# scope_config_assume_role_arn <nrn>
+# Back-compat fallback: reads the scope-configurations provider(s) at <nrn> and
+# echoes the first .attributes.assume_role.arn found, or empty string. This is
+# the legacy per-scope override that predates the IAM provider mechanism.
+scope_config_assume_role_arn() {
+  local nrn="$1"
+  [ -n "$nrn" ] || return 0
+  np provider list --nrn "$nrn" --categories scope-configurations \
+      --format json --limit 100 2>/dev/null \
+    | jq -r '[ (.results // [])[] | .attributes.assume_role.arn? // empty ] | first // ""' 2>/dev/null
+}
+
+# resolve_assume_role_arn <account_nrn> <selector>
+# Full precedence chain, echoes the IAM role ARN to assume (empty = use IRSA):
+#   1. $ASSUME_ROLE_ARN env var (explicit override)
+#   2. "AWS IAM" provider (aws-iam-configuration) at <account_nrn> by <selector>
+#   3. scope-configurations provider key assume_role.arn (back-compat)
+#   4. $ASSUME_ROLE_ARN_DEFAULT env var (per-account agent default)
+resolve_assume_role_arn() {
+  local nrn="$1" selector="$2" arn=""
+
+  arn="${ASSUME_ROLE_ARN:-}"
+
+  if [ -z "$arn" ] && [ -n "$nrn" ] && [ -n "$selector" ]; then
+    arn=$(provider_arn_for_selector "$nrn" "$selector")
+  fi
+
+  if [ -z "$arn" ] && [ -n "$nrn" ]; then
+    arn=$(scope_config_assume_role_arn "$nrn")
+  fi
+
+  arn="${arn:-${ASSUME_ROLE_ARN_DEFAULT:-}}"
+  printf '%s' "$arn"
+}
diff --git a/lambda/utils/assume_role_step b/lambda/utils/assume_role_step
new file mode 100755
index 0000000..ccc86cc
--- /dev/null
+++ b/lambda/utils/assume_role_step
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Dedicated workflow step: resolve the target IAM role and assume it, exporting
+# temporary credentials so every subsequent step in the workflow inherits them.
+#
+# Runs FIRST in each AWS-touching workflow. The workflow YAML must declare
+# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN as
+# output:environment so the nullplatform engine propagates them downstream.
+#
+# Resolution precedence (see resolve_assume_role_arn in assume_role_lib):
+#   $ASSUME_ROLE_ARN env -> IAM provider by selector -> scope-config -> DEFAULT -> IRSA
+#
+# Requires: aws CLI, jq, np. Expects: CONTEXT, SERVICE_PATH.
+
+# Optional pretty logging (utils/assume_role falls back to echo if `log` absent).
+[ -f "$SERVICE_PATH/utils/log" ] && source "$SERVICE_PATH/utils/log"
+# shellcheck source=assume_role_lib
+source "$SERVICE_PATH/utils/assume_role_lib"
+
+# Account NRN from CONTEXT (scope / service / generic event), stripping the
+# :namespace= segment and everything after it — the IAM provider is account-level.
+NRN=$(echo "${CONTEXT:-}" | jq -r '.scope.nrn // .service.nrn // .entity_nrn // ""' 2>/dev/null)
+ACCOUNT_NRN=$(echo "$NRN" | sed 's/:namespace=.*$//')
+ASSUME_ROLE_SELECTOR="${ASSUME_ROLE_SELECTOR:-lambda}"
+
+ASSUME_ROLE_ARN=$(resolve_assume_role_arn "$ACCOUNT_NRN" "$ASSUME_ROLE_SELECTOR")
+export ASSUME_ROLE_ARN
+
+# utils/assume_role performs sts:AssumeRole and exports AWS_* when an ARN is set,
+# or no-ops (leaving pod IRSA in place) when empty. It returns non-zero only when
+# sts:AssumeRole itself fails.
+if ! source "$SERVICE_PATH/utils/assume_role"; then
+  echo "❌ assume_role step failed: could not assume $ASSUME_ROLE_ARN" >&2
+  echo "💡 Possible causes:" >&2
+  echo "   - The agent's pod role is not allowed to sts:AssumeRole the target role" >&2
+  echo "   - The target role's trust policy does not trust the agent role" >&2
+  echo "   - The resolved ARN is wrong (check the IAM provider selector=$ASSUME_ROLE_SELECTOR)" >&2
+  exit 1
+fi
diff --git a/lambda/utils/fetch_scope_configuration b/lambda/utils/fetch_scope_configuration
index 9e467db..5ab0cc5 100755
--- a/lambda/utils/fetch_scope_configuration
+++ b/lambda/utils/fetch_scope_configuration
@@ -78,42 +78,10 @@ log debug "   ✅ placeholder_image_uri=${PLACEHOLDER_IMAGE_URI:-(not set, using
 NULL_AGENT_LAYER_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.agent.null_agent_layer_arn // empty')
 log debug "   ✅ null_agent_layer_arn=$NULL_AGENT_LAYER_ARN"
 
-# --- Resolve the IAM role ARN to assume -------------------------------------
-# Precedence (highest to lowest):
-#   1. $ASSUME_ROLE_ARN already in the environment (explicit override).
-#   2. The "AWS IAM" provider (Identity & Access Control, spec
-#      aws-iam-configuration) at the ACCOUNT nrn, matched by selector. The
-#      selector defaults to "lambda"; override with the ASSUME_ROLE_SELECTOR
-#      env var (values.yaml / agent extra_envs) if the provider uses another key.
-#   3. scope-configurations provider key assume_role.arn (back-compat).
-#   4. $ASSUME_ROLE_ARN_DEFAULT env var (per-account default on the agent).
-#   5. None of the above -> empty -> use the pod's IRSA credentials directly.
-# shellcheck source=assume_role_lib
-source "$SERVICE_PATH/utils/assume_role_lib"
-
-# The IAM provider lives at the account level; derive the account nrn from the
-# scope nrn by stripping everything from :namespace= onward.
-ACCOUNT_NRN=$(echo "$NRN" | sed 's/:namespace=.*$//')
-ASSUME_ROLE_SELECTOR="${ASSUME_ROLE_SELECTOR:-lambda}"
-
-ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-}"
-
-# 2. nullplatform IAM provider, by selector.
-if [ -z "$ASSUME_ROLE_ARN" ] && [ -n "$ACCOUNT_NRN" ] && [ -n "$ASSUME_ROLE_SELECTOR" ]; then
-  ASSUME_ROLE_ARN=$(provider_arn_for_selector "$ACCOUNT_NRN" "$ASSUME_ROLE_SELECTOR")
-  if [ -n "$ASSUME_ROLE_ARN" ]; then
-    log debug "   ✅ assume_role_arn from IAM provider (selector=$ASSUME_ROLE_SELECTOR)"
-  fi
-fi
-
-# 3. scope-configurations provider key (back-compat).
-if [ -z "$ASSUME_ROLE_ARN" ]; then
-  ASSUME_ROLE_ARN=$(echo "$SCOPE_CONFIG" | jq -r '.assume_role.arn // empty')
-fi
-
-# 4. Per-account default env var.
-ASSUME_ROLE_ARN="${ASSUME_ROLE_ARN:-${ASSUME_ROLE_ARN_DEFAULT:-}}"
-log debug "   ✅ assume_role_arn=${ASSUME_ROLE_ARN:-(not set, using pod credentials)}"
+# NOTE: The IAM role is assumed by the dedicated `assume_role` step that runs
+# first in each workflow (see utils/assume_role_step). By the time this script
+# runs, AWS_ACCESS_KEY_ID/SECRET/SESSION_TOKEN are already in the environment, so
+# no assume-role resolution happens here anymore.
 
 export ALB_PUBLIC_LISTENER_ARN
 export ALB_PRIVATE_LISTENER_ARN
@@ -127,9 +95,5 @@ export HOSTED_PRIVATE_ZONE_ID
 export TOFU_STATE_BUCKET
 export PLACEHOLDER_IMAGE_URI
 export NULL_AGENT_LAYER_ARN
-export ASSUME_ROLE_ARN
-
-# Apply assume role immediately so all subsequent AWS calls run under the target role
-source "$SERVICE_PATH/utils/assume_role"
 
 log info "✨ Scope configuration fetched successfully"

From 23e25154a706e45f6101b23ff5561f7555e97123 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 9 Jun 2026 16:36:27 -0300
Subject: [PATCH 21/24] feat(iam): make Lambda execution-role prefix
 configurable

The execution role name was hardcoded as np-lambda-<function>-role. Resolve the
prefix via get_config_value (scope-config provider lambda.execution_role_prefix
> LAMBDA_EXECUTION_ROLE_PREFIX env > default "np-lambda-"), keeping the previous
name as the default so existing scopes are unaffected.

Warn (non-blocking) when the prefix falls outside the assume role's IAM policy
constraint (np-lambda-* / nullplatform-*), since CreateRole/PassRole would
otherwise be denied unless that policy is widened.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/scope/tofu/iam/setup | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/lambda/scope/tofu/iam/setup b/lambda/scope/tofu/iam/setup
index 09fa234..a113d04 100755
--- a/lambda/scope/tofu/iam/setup
+++ b/lambda/scope/tofu/iam/setup
@@ -1,14 +1,30 @@
 #!/bin/bash
 
 source "$SERVICE_PATH/utils/log"
+source "$SERVICE_PATH/utils/get_config_value"
 
 log info "🔍 Configuring IAM role for deployment..."
 
-# Prefix with "np-lambda-" so the role name matches the iam:CreateRole/PassRole
-# Resource constraint in lambda/setup (arn:aws:iam::*:role/np-lambda-*).
-iam_role_name="np-lambda-${LAMBDA_FUNCTION_NAME}-role"
+# Execution-role name = <prefix><function>-role. The prefix is configurable
+# (scope-config provider > LAMBDA_EXECUTION_ROLE_PREFIX env > default), but it
+# MUST keep matching the iam:CreateRole/PassRole Resource constraint of the
+# assume role's policy in lambda/setup (arn:aws:iam::*:role/np-lambda-* or
+# .../nullplatform-*); otherwise CreateRole/PassRole are denied. The default
+# preserves the historical "np-lambda-".
+exec_role_prefix=$(get_config_value \
+  --provider '.providers["scope-configurations"].lambda.execution_role_prefix' \
+  --env LAMBDA_EXECUTION_ROLE_PREFIX \
+  --default 'np-lambda-')
+
+iam_role_name="${exec_role_prefix}${LAMBDA_FUNCTION_NAME}-role"
 iam_role_name="${iam_role_name:0:64}"
 
+# Warn (don't block) if the prefix falls outside the policy's allowed prefixes.
+case "$exec_role_prefix" in
+  np-lambda-*|nullplatform-*) : ;;
+  *) log warn "   ⚠️  execution_role_prefix='$exec_role_prefix' is outside the assume role's IAM policy constraint (np-lambda-* / nullplatform-*); CreateRole/PassRole may be denied unless that policy is updated" ;;
+esac
+
 log debug "   📋 role_name=$iam_role_name"
 
 role_output=$(aws iam get-role --role-name "$iam_role_name" 2>&1)

From 0b684760c3f8623c6ab6bada94bef4939efce99a Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 9 Jun 2026 16:40:21 -0300
Subject: [PATCH 22/24] chore(do_tofu): remove the stderr-redirect explanation
 comment

Drop the comment block above the tofu run; the 2>&1 redirect itself is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/scope/tofu/do_tofu | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lambda/scope/tofu/do_tofu b/lambda/scope/tofu/do_tofu
index bb90822..e6d94e5 100755
--- a/lambda/scope/tofu/do_tofu
+++ b/lambda/scope/tofu/do_tofu
@@ -176,10 +176,6 @@ if [ "$TOFU_ACTION" = "apply" ]; then
 
 fi
 
-# Run tofu action
-# Redirect stderr to stdout: OpenTofu writes its "Error:" block to stderr, and the
-# NP workflow executor only captures stdout — without this, the actual failure
-# reason (e.g. an AWS AccessDenied) never reaches the NP logs.
 log info "📝 Running tofu $TOFU_ACTION..."
 tofu_exit_code=0
 tofu -chdir="$TF_WORKING_DIR" "$TOFU_ACTION" -auto-approve -var-file="$TOFU_VAR_FILE" 2>&1 || tofu_exit_code=$?

From 51836a5c90f84e0f24e4cfaa803f2440de23c487 Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Tue, 9 Jun 2026 19:01:35 -0300
Subject: [PATCH 23/24] refactor: move install tofu module from lambda/setup to
 lambda/specs/tofu

Relocate the standalone install module (IAM role+policies and the NP
scope_definition registration) so the .tf lives next to the .json.tpl specs it
consumes. No behavior change; still a standalone root module.

- git mv lambda/setup -> lambda/specs/tofu (history preserved)
- backend.tf key: lambda/setup/... -> lambda/specs/tofu/...
- installation.md / prerequisites.md: update the paths

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/installation.md                                | 2 +-
 lambda/prerequisites.md                               | 4 ++--
 lambda/setup/backend.tf                               | 5 -----
 lambda/specs/tofu/backend.tf                          | 5 +++++
 lambda/{setup => specs/tofu}/main.tf                  | 0
 lambda/{setup => specs/tofu}/outputs.tf               | 0
 lambda/{setup => specs/tofu}/provider.tf              | 0
 lambda/{setup => specs/tofu}/requirements.tf          | 0
 lambda/{setup => specs/tofu}/terraform.tfvars.example | 0
 lambda/{setup => specs/tofu}/variables.tf             | 0
 10 files changed, 8 insertions(+), 8 deletions(-)
 delete mode 100644 lambda/setup/backend.tf
 create mode 100644 lambda/specs/tofu/backend.tf
 rename lambda/{setup => specs/tofu}/main.tf (100%)
 rename lambda/{setup => specs/tofu}/outputs.tf (100%)
 rename lambda/{setup => specs/tofu}/provider.tf (100%)
 rename lambda/{setup => specs/tofu}/requirements.tf (100%)
 rename lambda/{setup => specs/tofu}/terraform.tfvars.example (100%)
 rename lambda/{setup => specs/tofu}/variables.tf (100%)

diff --git a/lambda/installation.md b/lambda/installation.md
index 25dd240..f34eb07 100644
--- a/lambda/installation.md
+++ b/lambda/installation.md
@@ -29,7 +29,7 @@ git clone https://github.com/nullplatform/tofu-modules /root/.np/nullplatform/to
 ### 2. Configure variables
 
 ```bash
-cd lambda/setup
+cd lambda/specs/tofu
 cp terraform.tfvars.example terraform.tfvars
 ```
 
diff --git a/lambda/prerequisites.md b/lambda/prerequisites.md
index 8663155..c569754 100644
--- a/lambda/prerequisites.md
+++ b/lambda/prerequisites.md
@@ -229,7 +229,7 @@ Agents run in a Kubernetes pod and authenticate to AWS via a **Service Account**
 The IAM policies above let the agent CREATE Lambda functions and target
 groups, but the `create-scope` workflow ALSO depends on three runtime
 artifacts that must exist BEFORE the first scope is created. None are
-auto-created by the bundled `setup/main.tf` today — the operator
+auto-created by the bundled `specs/tofu/main.tf` today — the operator
 must provision them.
 
 ### 1. Placeholder image (private ECR)
@@ -383,7 +383,7 @@ This applies to **every** ECR repository that ever stores a Lambda
 image:
 
 1. The placeholder ECR (created during installation, addressed by
-   `lambda/setup/main.tf` if you use the bundled module — the policy is
+   `lambda/specs/tofu/main.tf` if you use the bundled module — the policy is
    already applied there).
 2. **The per-application ECR repositories** that `np asset push`
    creates dynamically when each app does its first build, named
diff --git a/lambda/setup/backend.tf b/lambda/setup/backend.tf
deleted file mode 100644
index a63cc72..0000000
--- a/lambda/setup/backend.tf
+++ /dev/null
@@ -1,5 +0,0 @@
-terraform {
-  backend "s3" {
-    key = "lambda/setup/terraform.tfstate"
-  }
-}
diff --git a/lambda/specs/tofu/backend.tf b/lambda/specs/tofu/backend.tf
new file mode 100644
index 0000000..cc6acba
--- /dev/null
+++ b/lambda/specs/tofu/backend.tf
@@ -0,0 +1,5 @@
+terraform {
+  backend "s3" {
+    key = "lambda/specs/tofu/terraform.tfstate"
+  }
+}
diff --git a/lambda/setup/main.tf b/lambda/specs/tofu/main.tf
similarity index 100%
rename from lambda/setup/main.tf
rename to lambda/specs/tofu/main.tf
diff --git a/lambda/setup/outputs.tf b/lambda/specs/tofu/outputs.tf
similarity index 100%
rename from lambda/setup/outputs.tf
rename to lambda/specs/tofu/outputs.tf
diff --git a/lambda/setup/provider.tf b/lambda/specs/tofu/provider.tf
similarity index 100%
rename from lambda/setup/provider.tf
rename to lambda/specs/tofu/provider.tf
diff --git a/lambda/setup/requirements.tf b/lambda/specs/tofu/requirements.tf
similarity index 100%
rename from lambda/setup/requirements.tf
rename to lambda/specs/tofu/requirements.tf
diff --git a/lambda/setup/terraform.tfvars.example b/lambda/specs/tofu/terraform.tfvars.example
similarity index 100%
rename from lambda/setup/terraform.tfvars.example
rename to lambda/specs/tofu/terraform.tfvars.example
diff --git a/lambda/setup/variables.tf b/lambda/specs/tofu/variables.tf
similarity index 100%
rename from lambda/setup/variables.tf
rename to lambda/specs/tofu/variables.tf

From 6752beefd4ebc313c936efd46c8d3e027b4022fe Mon Sep 17 00:00:00 2001
From: David Fernandez <david.fermnandez@nullplatform.com>
Date: Wed, 10 Jun 2026 12:59:05 -0300
Subject: [PATCH 24/24] fix(specs/tofu): bump aws provider constraint to ~>
 6.47.0

The specs/tofu module now also creates the Lambda IAM requirements
(aws resources), so its provider pin must be compatible with consumers
running the AWS provider 6.x line (EKS/agent stack). ~> 5.0 made the
provider graph unresolvable when composed with those modules.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lambda/specs/tofu/provider.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lambda/specs/tofu/provider.tf b/lambda/specs/tofu/provider.tf
index 4fa3661..5ea18b8 100644
--- a/lambda/specs/tofu/provider.tf
+++ b/lambda/specs/tofu/provider.tf
@@ -18,7 +18,7 @@ terraform {
     }
     aws = {
       source  = "hashicorp/aws"
-      version = "~> 5.0"
+      version = "~> 6.47.0"
     }
   }
 }