From 93ea3b3eaf38e28e729afdfc7688d0353780e788 Mon Sep 17 00:00:00 2001 From: Dustin Smith Date: Sun, 8 Mar 2026 20:43:29 +0700 Subject: [PATCH] fix: add missing DynamoDB env vars and IAM for sla-monitor The sla-monitor Lambda reads CONTROL_TABLE, JOBLOG_TABLE, and RERUN_TABLE to suppress alerts for completed/failed pipelines, but these env vars were never configured in Terraform. The new ValidateEnv startup check surfaced this pre-existing gap. Also removes STATE_MACHINE_ARN from orchestrator's required env vars since the orchestrator never starts SFN executions (only stream-router and watchdog do). --- deploy/terraform/lambda.tf | 27 +++++++++++++++++++++++++++ internal/lambda/envcheck.go | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/deploy/terraform/lambda.tf b/deploy/terraform/lambda.tf index 4635e04..c8d1e64 100644 --- a/deploy/terraform/lambda.tf +++ b/deploy/terraform/lambda.tf @@ -192,6 +192,9 @@ resource "aws_lambda_function" "sla_monitor" { environment { variables = { + CONTROL_TABLE = aws_dynamodb_table.control.name + JOBLOG_TABLE = aws_dynamodb_table.joblog.name + RERUN_TABLE = aws_dynamodb_table.rerun.name EVENT_BUS_NAME = aws_cloudwatch_event_bus.interlock.name SLA_MONITOR_ARN = "arn:aws:lambda:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:function:${var.environment}-interlock-sla-monitor" SCHEDULER_ROLE_ARN = aws_iam_role.scheduler_sla.arn @@ -304,6 +307,30 @@ resource "aws_iam_role_policy" "dynamodb_watchdog" { policy = data.aws_iam_policy_document.dynamodb_watchdog.json } +# ----------------------------------------------------------------------------- +# DynamoDB read — sla-monitor (control + joblog, read-only for alert suppression) +# ----------------------------------------------------------------------------- + +data "aws_iam_policy_document" "dynamodb_sla_read" { + statement { + sid = "ReadControlJoblog" + actions = [ + "dynamodb:GetItem", + "dynamodb:Query", + ] + resources = [ + aws_dynamodb_table.control.arn, + aws_dynamodb_table.joblog.arn, + ] + } +} + +resource "aws_iam_role_policy" "dynamodb_sla_read" { + name = "dynamodb-read" + role = aws_iam_role.lambda["sla-monitor"].id + policy = data.aws_iam_policy_document.dynamodb_sla_read.json +} + # ----------------------------------------------------------------------------- # EventBridge PutEvents — all 4 functions # ----------------------------------------------------------------------------- diff --git a/internal/lambda/envcheck.go b/internal/lambda/envcheck.go index 70ae426..0b715b2 100644 --- a/internal/lambda/envcheck.go +++ b/internal/lambda/envcheck.go @@ -10,7 +10,7 @@ import ( // it requires at startup. Missing vars cause a fail-fast with a clear message. var requiredEnvVars = map[string][]string{ "stream-router": {"CONTROL_TABLE", "JOBLOG_TABLE", "RERUN_TABLE", "STATE_MACHINE_ARN", "EVENT_BUS_NAME"}, - "orchestrator": {"CONTROL_TABLE", "JOBLOG_TABLE", "RERUN_TABLE", "STATE_MACHINE_ARN", "EVENT_BUS_NAME"}, + "orchestrator": {"CONTROL_TABLE", "JOBLOG_TABLE", "RERUN_TABLE", "EVENT_BUS_NAME"}, "watchdog": {"CONTROL_TABLE", "JOBLOG_TABLE", "RERUN_TABLE", "EVENT_BUS_NAME"}, "sla-monitor": {"CONTROL_TABLE", "JOBLOG_TABLE", "RERUN_TABLE", "EVENT_BUS_NAME", "SLA_MONITOR_ARN", "SCHEDULER_ROLE_ARN", "SCHEDULER_GROUP_NAME"}, "event-sink": {"EVENTS_TABLE"},