From 853ab90498f664eb8dae0d5583b8386093f3f5c6 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 13:39:06 +0900 Subject: [PATCH 01/10] =?UTF-8?q?[OT-14]=20[CHORE]:=20deploy-ai=20AWS=20?= =?UTF-8?q?=EC=9D=B8=EC=A6=9D=EC=9D=84=20OIDC=20role=20assume=20=EB=B0=A9?= =?UTF-8?q?=EC=8B=9D=EC=9C=BC=EB=A1=9C=20=EC=A0=84=ED=99=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-ai.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy-ai.yml b/.github/workflows/deploy-ai.yml index e4d0755d..05e0658e 100644 --- a/.github/workflows/deploy-ai.yml +++ b/.github/workflows/deploy-ai.yml @@ -11,6 +11,10 @@ on: types: - closed +permissions: + id-token: write + contents: read + env: AWS_REGION: ap-northeast-2 SERVICE_NAME: machine @@ -28,8 +32,7 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - name: Login to ECR @@ -73,8 +76,7 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - name: Deploy AI service to EC2 via SSM From cb4d1e72d13ba9b4cd789444953c24105b384ebb Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 13:39:16 +0900 Subject: [PATCH 02/10] =?UTF-8?q?[OT-14]=20[FEAT]:=20deploy-ec2-docker=20?= =?UTF-8?q?=EC=9D=B8=ED=94=84=EB=9D=BC=20=ED=8C=8C=EB=9D=BC=EB=AF=B8?= =?UTF-8?q?=ED=84=B0=C2=B7DB=20=EB=B9=84=EB=B0=80=20=EC=A1=B0=ED=9A=8C=20?= =?UTF-8?q?=EA=B8=B0=EB=B0=98=20=EB=B0=B0=ED=8F=AC=EB=A1=9C=20=EA=B0=9C?= =?UTF-8?q?=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-ec2-docker.yml | 66 +++++++++++++++++-------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/.github/workflows/deploy-ec2-docker.yml b/.github/workflows/deploy-ec2-docker.yml index 160c12ae..2b07d2e6 100644 --- a/.github/workflows/deploy-ec2-docker.yml +++ b/.github/workflows/deploy-ec2-docker.yml @@ -11,6 +11,10 @@ on: types: - closed +permissions: + id-token: write + contents: read + env: AWS_REGION: ap-northeast-2 @@ -36,8 +40,7 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - name: Login to ECR @@ -55,6 +58,7 @@ jobs: run: | IMAGE_TAG="${IMAGE_TAG_INPUT:-${GITHUB_SHA}}" IMAGE_URI="${ECR_REGISTRY}/${{ matrix.ecr_repo }}:${IMAGE_TAG}" + IMAGE_URI_LATEST="${ECR_REGISTRY}/${{ matrix.ecr_repo }}:latest" docker build \ @@ -75,8 +79,7 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - name: Deploy to EC2 instances via SSM @@ -85,28 +88,49 @@ jobs: IMAGE_TAG_INPUT: ${{ github.event.inputs.image_tag }} PROJECT_NAME: oplust DB_NAME: oplust - SSM_RDS_ENDPOINT_PARAM: /oplust/common/rds-endpoint - SSM_DB_USERNAME_PARAM: /oplust/common/db-username - SSM_DB_PASSWORD_PARAM: /oplust/common/db-password + RDS_INSTANCE_IDENTIFIER: oplust-dev-db NODE_EXPORTER_TARGET_SSM_PARAM: /oplust/monitoring/targets/node-exporter - ECS_CLUSTER_NAME: fluffy-flamingo-5ag1uq - ECS_SERVICE_NAME: oplust-transcoder-service + SSM_ECS_CLUSTER_NAME_PARAM: /oplust/dev/lambda/worker/ecs_cluster_name + SSM_ECS_SERVICE_NAME_PARAM: /oplust/dev/lambda/worker/ecs_service_name TRANSCODER_REPOSITORY: oplust-transcoder TRANSCODER_CONTAINER_NAME: oplust-transcoder - SCALER_LAMBDA_NAME: oplust-transcoder-scaler + ENV_NAME: dev run: | set -euo pipefail IMAGE_TAG="${IMAGE_TAG_INPUT:-${GITHUB_SHA}}" + ECS_CLUSTER_NAME=$(aws ssm get-parameter --region "$AWS_REGION" --name "$SSM_ECS_CLUSTER_NAME_PARAM" --with-decryption --query 'Parameter.Value' --output text) + ECS_SERVICE_NAME=$(aws ssm get-parameter --region "$AWS_REGION" --name "$SSM_ECS_SERVICE_NAME_PARAM" --with-decryption --query 'Parameter.Value' --output text) + SCALER_LAMBDA_NAME="${PROJECT_NAME}-${ENV_NAME}-worker" + + if [ -z "$ECS_CLUSTER_NAME" ] || [ "$ECS_CLUSTER_NAME" = "None" ] || [ -z "$ECS_SERVICE_NAME" ] || [ "$ECS_SERVICE_NAME" = "None" ]; then + echo "Failed to resolve ECS cluster/service from SSM parameters" >&2 + exit 1 + fi + + DB_HOST=$(aws rds describe-db-instances --region "$AWS_REGION" --db-instance-identifier "$RDS_INSTANCE_IDENTIFIER" --query 'DBInstances[0].Endpoint.Address' --output text) + DB_SECRET_ARN=$(aws rds describe-db-instances --region "$AWS_REGION" --db-instance-identifier "$RDS_INSTANCE_IDENTIFIER" --query 'DBInstances[0].MasterUserSecret.SecretArn' --output text) + DB_SECRET_JSON=$(aws secretsmanager get-secret-value --region "$AWS_REGION" --secret-id "$DB_SECRET_ARN" --query 'SecretString' --output text) + DB_USER=$(echo "$DB_SECRET_JSON" | jq -r '.username') + DB_PASS=$(echo "$DB_SECRET_JSON" | jq -r '.password') + + if [ -z "$DB_HOST" ] || [ "$DB_HOST" = "None" ] || [ -z "$DB_USER" ] || [ "$DB_USER" = "null" ] || [ -z "$DB_PASS" ] || [ "$DB_PASS" = "null" ]; then + echo "Failed to resolve DB connection values from RDS/SecretsManager" >&2 + exit 1 + fi + + DB_USER_B64=$(printf '%s' "$DB_USER" | base64 | tr -d '\n') + DB_PASS_B64=$(printf '%s' "$DB_PASS" | base64 | tr -d '\n') + MONITORING_PRIVATE_IP=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ - --filters "Name=tag:Name,Values=${PROJECT_NAME}-monitoring-ec2" "Name=instance-state-name,Values=running" \ + --filters "Name=tag:Name,Values=monitoring" "Name=instance-state-name,Values=running" \ --query "Reservations[0].Instances[0].PrivateIpAddress" \ --output text) if [ -z "$MONITORING_PRIVATE_IP" ] || [ "$MONITORING_PRIVATE_IP" = "None" ]; then - echo "No running monitoring instance found for tag: ${PROJECT_NAME}-monitoring-ec2" >&2 + echo "No running monitoring instance found for tag: monitoring" >&2 exit 1 fi @@ -187,13 +211,10 @@ jobs: commands=( "set -e" "sudo mkdir -p /etc/oplust" - "DB_HOST=\$(aws ssm get-parameter --region $AWS_REGION --name '$SSM_RDS_ENDPOINT_PARAM' --with-decryption --query 'Parameter.Value' --output text)" - "DB_USER=\$(aws ssm get-parameter --region $AWS_REGION --name '$SSM_DB_USERNAME_PARAM' --with-decryption --query 'Parameter.Value' --output text)" - "DB_PASS=\$(aws ssm get-parameter --region $AWS_REGION --name '$SSM_DB_PASSWORD_PARAM' --with-decryption --query 'Parameter.Value' --output text)" "SERVICE_ENV=\$(aws ssm get-parameter --region $AWS_REGION --name '${service_env_param}' --with-decryption --query 'Parameter.Value' --output text)" - "echo \"SPRING_DATASOURCE_URL=jdbc:mysql://\$DB_HOST:3306/${DB_NAME}\" | sudo tee ${env_file} >/dev/null" - "echo \"SPRING_DATASOURCE_USERNAME=\$DB_USER\" | sudo tee -a ${env_file} >/dev/null" - "echo \"SPRING_DATASOURCE_PASSWORD=\$DB_PASS\" | sudo tee -a ${env_file} >/dev/null" + "echo \"SPRING_DATASOURCE_URL=jdbc:mysql://${DB_HOST}:3306/${DB_NAME}\" | sudo tee ${env_file} >/dev/null" + "echo \"SPRING_DATASOURCE_USERNAME=\$(echo '${DB_USER_B64}' | base64 -d)\" | sudo tee -a ${env_file} >/dev/null" + "echo \"SPRING_DATASOURCE_PASSWORD=\$(echo '${DB_PASS_B64}' | base64 -d)\" | sudo tee -a ${env_file} >/dev/null" "printf '%s\n' \"\$SERVICE_ENV\" | sudo tee -a ${env_file} >/dev/null" "sudo chmod 600 ${env_file}" "aws ecr get-login-password --region $AWS_REGION | sudo docker login --username AWS --password-stdin $ECR_REGISTRY" @@ -469,6 +490,11 @@ jobs: rm -f "$td_file" "$new_td_file" } - deploy_service "${PROJECT_NAME}-user-ec2" "${ECR_REGISTRY}/oplust-api-user:${IMAGE_TAG}" "oplust-api-user" "/etc/oplust/api-user.env" "8080" "/oplust/api-user/env" "user-api" - deploy_service "${PROJECT_NAME}-admin-ec2" "${ECR_REGISTRY}/oplust-api-admin:${IMAGE_TAG}" "oplust-api-admin" "/etc/oplust/api-admin.env" "8081" "/oplust/api-admin/env" "admin-api" + deploy_service "user" "${ECR_REGISTRY}/oplust-api-user:${IMAGE_TAG}" "oplust-api-user" "/etc/oplust/api-user.env" "8080" "/oplust/${ENV_NAME}/api-user/env" "user-api" + deploy_service "admin" "${ECR_REGISTRY}/oplust-api-admin:${IMAGE_TAG}" "oplust-api-admin" "/etc/oplust/api-admin.env" "8081" "/oplust/${ENV_NAME}/api-admin/env" "admin-api" deploy_transcoder_ecs + + + + + From 97608ad02aab78ef29a6f60ffe63422897893745 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 13:39:26 +0900 Subject: [PATCH 03/10] =?UTF-8?q?[OT-14]=20[FEAT]:=20monitoring=20?= =?UTF-8?q?=EB=B0=B0=ED=8F=AC=20=ED=83=80=EA=B9=83=20=EC=9E=90=EB=8F=99=20?= =?UTF-8?q?=ED=95=B4=EC=84=9D=20=EB=B0=8F=20Grafana=20Secret=20Manager=20?= =?UTF-8?q?=EC=97=B0=EB=8F=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-monitoring.yml | 175 ++++++++++-------------- 1 file changed, 69 insertions(+), 106 deletions(-) diff --git a/.github/workflows/deploy-monitoring.yml b/.github/workflows/deploy-monitoring.yml index 20d00c72..727bd084 100644 --- a/.github/workflows/deploy-monitoring.yml +++ b/.github/workflows/deploy-monitoring.yml @@ -6,43 +6,28 @@ on: monitoring_instance_tag: description: "EC2 Name tag for monitoring server" required: true - default: "oplust-monitoring-ec2" + default: "monitoring" type: string - user_api_target_ssm_param: - description: "SSM parameter name for user-api target (host:port)" + project_name: + description: "Project name" required: true - default: "/oplust/monitoring/targets/user-api" + default: "oplust" type: string - admin_api_target_ssm_param: - description: "SSM parameter name for admin-api target (host:port)" + environment: + description: "Environment name" required: true - default: "/oplust/monitoring/targets/admin-api" + default: "dev" type: string - transcoder_target_ssm_param: - description: "SSM parameter name for transcoder target (host:port)" + grafana_admin_secret_id: + description: "Secrets Manager secret id/arn for Grafana admin credentials" required: true - default: "/oplust/monitoring/targets/transcoder" - type: string - node_exporter_target_ssm_param: - description: "SSM parameter name for node-exporter target (host:port)" - required: true - default: "/oplust/monitoring/targets/node-exporter" - type: string - machine_node_exporter_target_ssm_param: - description: "SSM parameter name for machine node-exporter target (host:port)" - required: true - default: "/oplust/monitoring/targets/machine-node-exporter" - type: string - grafana_password_ssm_param: - description: "SSM SecureString parameter name for Grafana admin password" - required: true - default: "/oplust/monitoring/grafana-admin-password" - type: string - grafana_admin_password: - description: "Optional override password (leave blank to use SSM)" - required: false + default: "oplust/dev/monitoring/grafana-admin-credentials" type: string +permissions: + id-token: write + contents: read + env: AWS_REGION: ap-northeast-2 MONITORING_ROOT: /opt/oplust-monitoring @@ -58,82 +43,66 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - - name: Resolve scrape targets from SSM + - name: Resolve targets from Terraform naming convention env: - USER_API_TARGET_SSM_PARAM: ${{ github.event.inputs.user_api_target_ssm_param }} - ADMIN_API_TARGET_SSM_PARAM: ${{ github.event.inputs.admin_api_target_ssm_param }} - TRANSCODER_TARGET_SSM_PARAM: ${{ github.event.inputs.transcoder_target_ssm_param }} - NODE_EXPORTER_TARGET_SSM_PARAM: ${{ github.event.inputs.node_exporter_target_ssm_param }} - MACHINE_NODE_EXPORTER_TARGET_SSM_PARAM: ${{ github.event.inputs.machine_node_exporter_target_ssm_param }} + PROJECT_NAME: ${{ github.event.inputs.project_name }} + ENV_NAME: ${{ github.event.inputs.environment }} + INSTANCE_TAG: ${{ github.event.inputs.monitoring_instance_tag }} run: | set -euo pipefail - USER_API_TARGET=$(aws ssm get-parameter \ - --region "$AWS_REGION" \ - --name "$USER_API_TARGET_SSM_PARAM" \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) + get_private_ip_by_name() { + local name_tag="$1" + aws ec2 describe-instances \ + --region "$AWS_REGION" \ + --filters "Name=tag:Name,Values=${name_tag}" "Name=instance-state-name,Values=running" \ + --query "Reservations[0].Instances[0].PrivateIpAddress" \ + --output text + } - ADMIN_API_TARGET=$(aws ssm get-parameter \ - --region "$AWS_REGION" \ - --name "$ADMIN_API_TARGET_SSM_PARAM" \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) + USER_API_IP=$(get_private_ip_by_name "user") + ADMIN_API_IP=$(get_private_ip_by_name "admin") + MONITORING_IP=$(get_private_ip_by_name "${INSTANCE_TAG}") - TRANSCODER_TARGET=$(aws ssm get-parameter \ - --region "$AWS_REGION" \ - --name "$TRANSCODER_TARGET_SSM_PARAM" \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) + if [ -z "$USER_API_IP" ] || [ "$USER_API_IP" = "None" ] || [ -z "$ADMIN_API_IP" ] || [ "$ADMIN_API_IP" = "None" ] || [ -z "$MONITORING_IP" ] || [ "$MONITORING_IP" = "None" ]; then + echo "Failed to resolve required EC2 private IPs (user/admin/monitoring)." >&2 + exit 1 + fi - NODE_EXPORTER_TARGET=$(aws ssm get-parameter \ - --region "$AWS_REGION" \ - --name "$NODE_EXPORTER_TARGET_SSM_PARAM" \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) + USER_API_TARGET="${USER_API_IP}:8080" + ADMIN_API_TARGET="${ADMIN_API_IP}:8081" + MACHINE_NODE_EXPORTER_TARGET="${MONITORING_IP}:9100" - MACHINE_NODE_EXPORTER_TARGET=$(aws ssm get-parameter \ - --region "$AWS_REGION" \ - --name "$MACHINE_NODE_EXPORTER_TARGET_SSM_PARAM" \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) + NODE_EXPORTER_TARGETS_YAML="\"${USER_API_IP}:9100\",\"${ADMIN_API_IP}:9100\",\"${MONITORING_IP}:9100\"" + + ECS_CLUSTER_PARAM="/${PROJECT_NAME}/${ENV_NAME}/lambda/worker/ecs_cluster_name" + ECS_SERVICE_PARAM="/${PROJECT_NAME}/${ENV_NAME}/lambda/worker/ecs_service_name" - if [ -z "$USER_API_TARGET" ] || [ "$USER_API_TARGET" = "None" ] || \ - [ -z "$ADMIN_API_TARGET" ] || [ "$ADMIN_API_TARGET" = "None" ] || \ - [ -z "$TRANSCODER_TARGET" ] || [ "$TRANSCODER_TARGET" = "None" ] || \ - [ -z "$NODE_EXPORTER_TARGET" ] || [ "$NODE_EXPORTER_TARGET" = "None" ] || \ - [ -z "$MACHINE_NODE_EXPORTER_TARGET" ] || [ "$MACHINE_NODE_EXPORTER_TARGET" = "None" ]; then - echo "One or more scrape targets are empty. Check SSM parameter values." >&2 + ECS_CLUSTER_NAME=$(aws ssm get-parameter --region "$AWS_REGION" --name "$ECS_CLUSTER_PARAM" --with-decryption --query 'Parameter.Value' --output text) + ECS_SERVICE_NAME=$(aws ssm get-parameter --region "$AWS_REGION" --name "$ECS_SERVICE_PARAM" --with-decryption --query 'Parameter.Value' --output text) + + if [ -z "$ECS_CLUSTER_NAME" ] || [ "$ECS_CLUSTER_NAME" = "None" ] || [ -z "$ECS_SERVICE_NAME" ] || [ "$ECS_SERVICE_NAME" = "None" ]; then + echo "Failed to resolve ECS cluster/service from SSM." >&2 exit 1 fi - normalize_targets() { - # Accept comma/newline/space separated values and render a YAML inline list payload: - # "host1:port","host2:port" - printf '%s' "$1" \ - | tr ',\r\n\t' ' ' \ - | xargs -n1 \ - | sed '/^$/d' \ - | sort -u \ - | awk '{printf "\"%s\",", $0}' \ - | sed 's/,$//' - } + TASK_ARNS=$(aws ecs list-tasks --region "$AWS_REGION" --cluster "$ECS_CLUSTER_NAME" --service-name "$ECS_SERVICE_NAME" --desired-status RUNNING --query 'taskArns' --output text || true) + TRANSCODER_TARGETS_YAML="" - TRANSCODER_TARGETS_YAML=$(normalize_targets "$TRANSCODER_TARGET") - NODE_EXPORTER_TARGETS_YAML=$(normalize_targets "$NODE_EXPORTER_TARGET") + if [ -n "${TASK_ARNS:-}" ]; then + ENI_IDS=$(aws ecs describe-tasks --region "$AWS_REGION" --cluster "$ECS_CLUSTER_NAME" --tasks $TASK_ARNS \ + --query 'tasks[].attachments[].details[?name==`networkInterfaceId`].value' --output text || true) - if [ -z "$TRANSCODER_TARGETS_YAML" ] || [ -z "$NODE_EXPORTER_TARGETS_YAML" ]; then - echo "Rendered target list is empty. Check SSM parameter values." >&2 - exit 1 + if [ -n "${ENI_IDS:-}" ]; then + TRANSCODER_IPS=$(aws ec2 describe-network-interfaces --region "$AWS_REGION" --network-interface-ids $ENI_IDS \ + --query 'NetworkInterfaces[].PrivateIpAddress' --output text || true) + if [ -n "${TRANSCODER_IPS:-}" ]; then + TRANSCODER_TARGETS_YAML=$(printf '%s\n' $TRANSCODER_IPS | sed '/^$/d' | sort -u | awk '{printf "\"%s:8080\",", $0}' | sed 's/,$//') + fi + fi fi echo "USER_API_TARGET=$USER_API_TARGET" >> "$GITHUB_ENV" @@ -154,26 +123,22 @@ jobs: -e "s|__MACHINE_NODE_EXPORTER_TARGET__|${MACHINE_NODE_EXPORTER_TARGET}|g" \ apps/monitoring/prometheus/prometheus.prod.yml.tpl > apps/monitoring/prometheus/prometheus.prod.yml - - name: Resolve Grafana admin password + - name: Resolve Grafana admin password from Secrets Manager env: - GRAFANA_ADMIN_PASSWORD_INPUT: ${{ github.event.inputs.grafana_admin_password }} - GRAFANA_PASSWORD_SSM_PARAM: ${{ github.event.inputs.grafana_password_ssm_param }} + GRAFANA_ADMIN_SECRET_ID: ${{ github.event.inputs.grafana_admin_secret_id }} run: | set -euo pipefail - if [ -n "$GRAFANA_ADMIN_PASSWORD_INPUT" ]; then - GRAFANA_PASSWORD="$GRAFANA_ADMIN_PASSWORD_INPUT" - else - GRAFANA_PASSWORD=$(aws ssm get-parameter \ - --region "$AWS_REGION" \ - --name "$GRAFANA_PASSWORD_SSM_PARAM" \ - --with-decryption \ - --query 'Parameter.Value' \ - --output text) - fi + SECRET_JSON=$(aws secretsmanager get-secret-value \ + --region "$AWS_REGION" \ + --secret-id "$GRAFANA_ADMIN_SECRET_ID" \ + --query 'SecretString' \ + --output text) + + GRAFANA_PASSWORD=$(echo "$SECRET_JSON" | jq -r '.password') - if [ -z "$GRAFANA_PASSWORD" ] || [ "$GRAFANA_PASSWORD" = "None" ]; then - echo "Grafana admin password is empty. Check input or SSM parameter." >&2 + if [ -z "$GRAFANA_PASSWORD" ] || [ "$GRAFANA_PASSWORD" = "null" ]; then + echo "Grafana admin password is empty in secret: $GRAFANA_ADMIN_SECRET_ID" >&2 exit 1 fi @@ -265,5 +230,3 @@ jobs: echo "Monitoring deployment timed out" >&2 exit 1 - - From 23aed85f04622f919e8d7055c8b90d40e01e02db Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 13:39:43 +0900 Subject: [PATCH 04/10] =?UTF-8?q?[OT-14]=20[FEAT]:=20rabbitmq=20=EC=82=AC?= =?UTF-8?q?=EC=9A=A9=EC=9E=90=20=EC=84=A4=EC=A0=95=EC=9D=84=20Secrets=20Ma?= =?UTF-8?q?nager=20=EB=8F=99=EA=B8=B0=ED=99=94=20=EB=B0=A9=EC=8B=9D?= =?UTF-8?q?=EC=9C=BC=EB=A1=9C=20=EC=A0=84=ED=99=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-rabbitmq.yml | 85 ++++++++++++++------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/.github/workflows/deploy-rabbitmq.yml b/.github/workflows/deploy-rabbitmq.yml index f7bf7c47..7b2282aa 100644 --- a/.github/workflows/deploy-rabbitmq.yml +++ b/.github/workflows/deploy-rabbitmq.yml @@ -6,34 +6,30 @@ on: rabbitmq_instance_tag: description: "EC2 Name tag for RabbitMQ server" required: true - default: "oplust-rabbitmq-ec2" + default: "rabbitmq" type: string - rabbitmq_image_tag: - description: "RabbitMQ image tag" + rabbitmq_admin_secret_id: + description: "Secrets Manager secret id/arn for RabbitMQ admin credentials" required: true - default: "3.13-management" + default: "oplust/dev/rabbitmq/admin-credentials" type: string - rabbitmq_user_ssm_param: - description: "SSM SecureString parameter for RabbitMQ default user" + rabbitmq_app_secret_id: + description: "Secrets Manager secret id/arn for RabbitMQ app credentials" required: true - default: "/oplust/rabbitmq/default-user" + default: "oplust/dev/rabbitmq/credentials" type: string - rabbitmq_password_ssm_param: - description: "SSM SecureString parameter for RabbitMQ default password" + rabbitmq_vhost: + description: "RabbitMQ vhost" required: true - default: "/oplust/rabbitmq/default-password" - type: string - rabbitmq_vhost_ssm_param: - description: "SSM parameter for RabbitMQ default vhost" - required: true - default: "/oplust/rabbitmq/default-vhost" + default: "/" type: string +permissions: + id-token: write + contents: read + env: AWS_REGION: ap-northeast-2 - RABBITMQ_ROOT: /opt/oplust-rabbitmq - RABBITMQ_CONTAINER_NAME: oplust-rabbitmq - RABBITMQ_DATA_VOLUME: oplust-rabbitmq-data jobs: deploy-rabbitmq: @@ -43,17 +39,15 @@ jobs: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - - name: Deploy RabbitMQ via SSM + - name: Sync RabbitMQ users from Secrets Manager via SSM env: INSTANCE_TAG: ${{ github.event.inputs.rabbitmq_instance_tag }} - RABBITMQ_IMAGE_TAG: ${{ github.event.inputs.rabbitmq_image_tag }} - RABBITMQ_USER_SSM_PARAM: ${{ github.event.inputs.rabbitmq_user_ssm_param }} - RABBITMQ_PASSWORD_SSM_PARAM: ${{ github.event.inputs.rabbitmq_password_ssm_param }} - RABBITMQ_VHOST_SSM_PARAM: ${{ github.event.inputs.rabbitmq_vhost_ssm_param }} + RABBITMQ_ADMIN_SECRET_ID: ${{ github.event.inputs.rabbitmq_admin_secret_id }} + RABBITMQ_APP_SECRET_ID: ${{ github.event.inputs.rabbitmq_app_secret_id }} + RABBITMQ_VHOST: ${{ github.event.inputs.rabbitmq_vhost }} run: | set -euo pipefail @@ -70,25 +64,32 @@ jobs: PARAMS=$(jq -nc \ --arg c1 "set -euo pipefail" \ - --arg c2 "sudo mkdir -p ${RABBITMQ_ROOT}" \ - --arg c3 "RABBITMQ_DEFAULT_USER=\$(aws ssm get-parameter --region $AWS_REGION --name '${RABBITMQ_USER_SSM_PARAM}' --with-decryption --query 'Parameter.Value' --output text)" \ - --arg c4 "RABBITMQ_DEFAULT_PASS=\$(aws ssm get-parameter --region $AWS_REGION --name '${RABBITMQ_PASSWORD_SSM_PARAM}' --with-decryption --query 'Parameter.Value' --output text)" \ - --arg c5 "RABBITMQ_DEFAULT_VHOST=\$(aws ssm get-parameter --region $AWS_REGION --name '${RABBITMQ_VHOST_SSM_PARAM}' --with-decryption --query 'Parameter.Value' --output text)" \ - --arg c6 "if [ -z \\\"\$RABBITMQ_DEFAULT_USER\\\" ] || [ \\\"\$RABBITMQ_DEFAULT_USER\\\" = \\\"None\\\" ] || [ -z \\\"\$RABBITMQ_DEFAULT_PASS\\\" ] || [ \\\"\$RABBITMQ_DEFAULT_PASS\\\" = \\\"None\\\" ] || [ -z \\\"\$RABBITMQ_DEFAULT_VHOST\\\" ] || [ \\\"\$RABBITMQ_DEFAULT_VHOST\\\" = \\\"None\\\" ]; then echo 'RabbitMQ env values are empty from SSM' >&2; exit 1; fi" \ - --arg c7 "printf '%s\n' \\\"RABBITMQ_DEFAULT_USER=\$RABBITMQ_DEFAULT_USER\\\" \\\"RABBITMQ_DEFAULT_PASS=\$RABBITMQ_DEFAULT_PASS\\\" \\\"RABBITMQ_DEFAULT_VHOST=\$RABBITMQ_DEFAULT_VHOST\\\" | sudo tee ${RABBITMQ_ROOT}/.env >/dev/null" \ - --arg c8 "sudo chmod 600 ${RABBITMQ_ROOT}/.env" \ - --arg c9 "sudo docker pull rabbitmq:${RABBITMQ_IMAGE_TAG}" \ - --arg c10 "sudo docker rm -f ${RABBITMQ_CONTAINER_NAME} || true" \ - --arg c11 "sudo docker volume create ${RABBITMQ_DATA_VOLUME} >/dev/null" \ - --arg c12 "sudo docker run -d --name ${RABBITMQ_CONTAINER_NAME} --restart unless-stopped -p 5672:5672 -p 15672:15672 --env-file ${RABBITMQ_ROOT}/.env -v ${RABBITMQ_DATA_VOLUME}:/var/lib/rabbitmq rabbitmq:${RABBITMQ_IMAGE_TAG}" \ - --arg c13 "for i in \$(seq 1 30); do if sudo docker exec ${RABBITMQ_CONTAINER_NAME} rabbitmq-diagnostics -q ping >/dev/null 2>&1; then echo 'RabbitMQ is healthy'; exit 0; fi; sleep 2; done; echo 'RabbitMQ health check failed' >&2; exit 1" \ - '{commands:[$c1,$c2,$c3,$c4,$c5,$c6,$c7,$c8,$c9,$c10,$c11,$c12,$c13]}') + --arg c2 "command -v jq >/dev/null 2>&1 || sudo dnf -y install jq" \ + --arg c3 "command -v aws >/dev/null 2>&1 || sudo dnf -y install awscli" \ + --arg c4 "sudo systemctl enable --now rabbitmq-server" \ + --arg c5 "sudo rabbitmq-plugins enable rabbitmq_management" \ + --arg c6 "ADMIN_JSON=\$(aws secretsmanager get-secret-value --region $AWS_REGION --secret-id '${RABBITMQ_ADMIN_SECRET_ID}' --query SecretString --output text)" \ + --arg c7 "APP_JSON=\$(aws secretsmanager get-secret-value --region $AWS_REGION --secret-id '${RABBITMQ_APP_SECRET_ID}' --query SecretString --output text)" \ + --arg c8 "ADMIN_USER=\$(echo \"\$ADMIN_JSON\" | jq -r '.username')" \ + --arg c9 "ADMIN_PASS=\$(echo \"\$ADMIN_JSON\" | jq -r '.password')" \ + --arg c10 "APP_USER=\$(echo \"\$APP_JSON\" | jq -r '.username')" \ + --arg c11 "APP_PASS=\$(echo \"\$APP_JSON\" | jq -r '.password')" \ + --arg c12 "if [ -z \"\$ADMIN_USER\" ] || [ \"\$ADMIN_USER\" = \"null\" ] || [ -z \"\$ADMIN_PASS\" ] || [ \"\$ADMIN_PASS\" = \"null\" ] || [ -z \"\$APP_USER\" ] || [ \"\$APP_USER\" = \"null\" ] || [ -z \"\$APP_PASS\" ] || [ \"\$APP_PASS\" = \"null\" ]; then echo 'Secret payload invalid (username/password missing)' >&2; exit 1; fi" \ + --arg c13 "if sudo rabbitmqctl list_users | awk '{print \$1}' | grep -qx \"\$ADMIN_USER\"; then sudo rabbitmqctl change_password \"\$ADMIN_USER\" \"\$ADMIN_PASS\"; else sudo rabbitmqctl add_user \"\$ADMIN_USER\" \"\$ADMIN_PASS\"; fi" \ + --arg c14 "sudo rabbitmqctl set_user_tags \"\$ADMIN_USER\" administrator" \ + --arg c15 "sudo rabbitmqctl set_permissions -p '${RABBITMQ_VHOST}' \"\$ADMIN_USER\" '.*' '.*' '.*'" \ + --arg c16 "if sudo rabbitmqctl list_users | awk '{print \$1}' | grep -qx \"\$APP_USER\"; then sudo rabbitmqctl change_password \"\$APP_USER\" \"\$APP_PASS\"; else sudo rabbitmqctl add_user \"\$APP_USER\" \"\$APP_PASS\"; fi" \ + --arg c17 "sudo rabbitmqctl set_user_tags \"\$APP_USER\" management" \ + --arg c18 "sudo rabbitmqctl set_permissions -p '${RABBITMQ_VHOST}' \"\$APP_USER\" '.*' '.*' '.*'" \ + --arg c19 "if sudo rabbitmqctl list_users | awk '{print \$1}' | grep -qx guest; then sudo rabbitmqctl delete_user guest || true; fi" \ + --arg c20 "sudo rabbitmq-diagnostics -q ping" \ + '{commands:[$c1,$c2,$c3,$c4,$c5,$c6,$c7,$c8,$c9,$c10,$c11,$c12,$c13,$c14,$c15,$c16,$c17,$c18,$c19,$c20]}') CMD_ID=$(aws ssm send-command \ --region "$AWS_REGION" \ --instance-ids "$INSTANCE_ID" \ --document-name "AWS-RunShellScript" \ - --comment "Deploy RabbitMQ" \ + --comment "Sync RabbitMQ users from Secrets Manager" \ --parameters "$PARAMS" \ --query 'Command.CommandId' \ --output text) @@ -105,11 +106,11 @@ jobs: case "$STATUS" in Success) - echo "RabbitMQ deployment success" + echo "RabbitMQ sync success" exit 0 ;; Failed|Cancelled|TimedOut) - echo "RabbitMQ deployment failed: $STATUS" >&2 + echo "RabbitMQ sync failed: $STATUS" >&2 aws ssm get-command-invocation \ --region "$AWS_REGION" \ --command-id "$CMD_ID" \ @@ -127,5 +128,5 @@ jobs: esac done - echo "RabbitMQ deployment timed out" >&2 + echo "RabbitMQ sync timed out" >&2 exit 1 From 7061e4ccd41b2f4b6dc97b6fd180e2e2af2f222c Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 15:36:23 +0900 Subject: [PATCH 05/10] =?UTF-8?q?[OT-14]=20[CHORE]:=20dev=20=EC=9D=B8?= =?UTF-8?q?=ED=94=84=EB=9D=BC=20=EA=B8=B0=EC=A4=80=20=EB=B0=B0=ED=8F=AC=20?= =?UTF-8?q?=EC=9B=8C=ED=81=AC=ED=94=8C=EB=A1=9C=EC=9A=B0=20=EC=A0=95?= =?UTF-8?q?=ED=95=A9=EC=84=B1=20=EB=B0=8F=20=EB=B3=B4=EC=95=88=20=EA=B0=9C?= =?UTF-8?q?=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-ai.yml | 6 +++--- .github/workflows/deploy-ec2-docker.yml | 27 ++++++++++++------------- .github/workflows/deploy-monitoring.yml | 6 +++--- .github/workflows/deploy-rabbitmq.yml | 5 +++++ 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.github/workflows/deploy-ai.yml b/.github/workflows/deploy-ai.yml index 05e0658e..1aa17c65 100644 --- a/.github/workflows/deploy-ai.yml +++ b/.github/workflows/deploy-ai.yml @@ -98,12 +98,12 @@ jobs: MONITORING_PRIVATE_IP=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ - --filters "Name=tag:Name,Values=${PROJECT_NAME}-monitoring-ec2" "Name=instance-state-name,Values=running" \ + --filters "Name=tag:Name,Values=monitoring" "Name=instance-state-name,Values=running" \ --query "Reservations[0].Instances[0].PrivateIpAddress" \ --output text) if [ -z "$MONITORING_PRIVATE_IP" ] || [ "$MONITORING_PRIVATE_IP" = "None" ]; then - echo "No running monitoring instance found for tag: ${PROJECT_NAME}-monitoring-ec2" >&2 + echo "No running monitoring instance found for tag: monitoring" >&2 exit 1 fi @@ -126,7 +126,7 @@ jobs: MONITORING_SG_ID=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ - --filters "Name=tag:Name,Values=${PROJECT_NAME}-monitoring-ec2" "Name=instance-state-name,Values=running" \ + --filters "Name=tag:Name,Values=monitoring" "Name=instance-state-name,Values=running" \ --query "Reservations[0].Instances[0].SecurityGroups[0].GroupId" \ --output text) diff --git a/.github/workflows/deploy-ec2-docker.yml b/.github/workflows/deploy-ec2-docker.yml index 2b07d2e6..33b84cb1 100644 --- a/.github/workflows/deploy-ec2-docker.yml +++ b/.github/workflows/deploy-ec2-docker.yml @@ -89,6 +89,7 @@ jobs: PROJECT_NAME: oplust DB_NAME: oplust RDS_INSTANCE_IDENTIFIER: oplust-dev-db + MONITORING_INSTANCE_TAG: monitoring NODE_EXPORTER_TARGET_SSM_PARAM: /oplust/monitoring/targets/node-exporter SSM_ECS_CLUSTER_NAME_PARAM: /oplust/dev/lambda/worker/ecs_cluster_name SSM_ECS_SERVICE_NAME_PARAM: /oplust/dev/lambda/worker/ecs_service_name @@ -110,27 +111,19 @@ jobs: fi DB_HOST=$(aws rds describe-db-instances --region "$AWS_REGION" --db-instance-identifier "$RDS_INSTANCE_IDENTIFIER" --query 'DBInstances[0].Endpoint.Address' --output text) - DB_SECRET_ARN=$(aws rds describe-db-instances --region "$AWS_REGION" --db-instance-identifier "$RDS_INSTANCE_IDENTIFIER" --query 'DBInstances[0].MasterUserSecret.SecretArn' --output text) - DB_SECRET_JSON=$(aws secretsmanager get-secret-value --region "$AWS_REGION" --secret-id "$DB_SECRET_ARN" --query 'SecretString' --output text) - DB_USER=$(echo "$DB_SECRET_JSON" | jq -r '.username') - DB_PASS=$(echo "$DB_SECRET_JSON" | jq -r '.password') - - if [ -z "$DB_HOST" ] || [ "$DB_HOST" = "None" ] || [ -z "$DB_USER" ] || [ "$DB_USER" = "null" ] || [ -z "$DB_PASS" ] || [ "$DB_PASS" = "null" ]; then - echo "Failed to resolve DB connection values from RDS/SecretsManager" >&2 + if [ -z "$DB_HOST" ] || [ "$DB_HOST" = "None" ]; then + echo "Failed to resolve DB host from RDS" >&2 exit 1 fi - DB_USER_B64=$(printf '%s' "$DB_USER" | base64 | tr -d '\n') - DB_PASS_B64=$(printf '%s' "$DB_PASS" | base64 | tr -d '\n') - MONITORING_PRIVATE_IP=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ - --filters "Name=tag:Name,Values=monitoring" "Name=instance-state-name,Values=running" \ + --filters "Name=tag:Name,Values=${MONITORING_INSTANCE_TAG}" "Name=instance-state-name,Values=running" \ --query "Reservations[0].Instances[0].PrivateIpAddress" \ --output text) if [ -z "$MONITORING_PRIVATE_IP" ] || [ "$MONITORING_PRIVATE_IP" = "None" ]; then - echo "No running monitoring instance found for tag: monitoring" >&2 + echo "No running monitoring instance found for tag: ${MONITORING_INSTANCE_TAG}" >&2 exit 1 fi @@ -212,9 +205,15 @@ jobs: "set -e" "sudo mkdir -p /etc/oplust" "SERVICE_ENV=\$(aws ssm get-parameter --region $AWS_REGION --name '${service_env_param}' --with-decryption --query 'Parameter.Value' --output text)" + "if ! command -v jq >/dev/null 2>&1; then if command -v dnf >/dev/null 2>&1; then sudo dnf -y install jq; elif command -v yum >/dev/null 2>&1; then sudo yum -y install jq; elif command -v apt-get >/dev/null 2>&1; then sudo apt-get update -y && sudo apt-get install -y jq; else echo 'jq not found and no supported package manager to install it' >&2; exit 1; fi; fi" + "DB_SECRET_ARN=\$(aws rds describe-db-instances --region $AWS_REGION --db-instance-identifier '$RDS_INSTANCE_IDENTIFIER' --query 'DBInstances[0].MasterUserSecret.SecretArn' --output text)" + "DB_SECRET_JSON=\$(aws secretsmanager get-secret-value --region $AWS_REGION --secret-id \"\$DB_SECRET_ARN\" --query 'SecretString' --output text)" + "DB_USER=\$(echo \"\$DB_SECRET_JSON\" | jq -r '.username')" + "DB_PASS=\$(echo \"\$DB_SECRET_JSON\" | jq -r '.password')" + "if [ -z \"\$DB_USER\" ] || [ \"\$DB_USER\" = \"null\" ] || [ -z \"\$DB_PASS\" ] || [ \"\$DB_PASS\" = \"null\" ]; then echo 'Failed to resolve DB credentials from Secrets Manager' >&2; exit 1; fi" "echo \"SPRING_DATASOURCE_URL=jdbc:mysql://${DB_HOST}:3306/${DB_NAME}\" | sudo tee ${env_file} >/dev/null" - "echo \"SPRING_DATASOURCE_USERNAME=\$(echo '${DB_USER_B64}' | base64 -d)\" | sudo tee -a ${env_file} >/dev/null" - "echo \"SPRING_DATASOURCE_PASSWORD=\$(echo '${DB_PASS_B64}' | base64 -d)\" | sudo tee -a ${env_file} >/dev/null" + "echo \"SPRING_DATASOURCE_USERNAME=\$DB_USER\" | sudo tee -a ${env_file} >/dev/null" + "echo \"SPRING_DATASOURCE_PASSWORD=\$DB_PASS\" | sudo tee -a ${env_file} >/dev/null" "printf '%s\n' \"\$SERVICE_ENV\" | sudo tee -a ${env_file} >/dev/null" "sudo chmod 600 ${env_file}" "aws ecr get-login-password --region $AWS_REGION | sudo docker login --username AWS --password-stdin $ECR_REGISTRY" diff --git a/.github/workflows/deploy-monitoring.yml b/.github/workflows/deploy-monitoring.yml index 727bd084..d9346f3e 100644 --- a/.github/workflows/deploy-monitoring.yml +++ b/.github/workflows/deploy-monitoring.yml @@ -89,18 +89,18 @@ jobs: exit 1 fi - TASK_ARNS=$(aws ecs list-tasks --region "$AWS_REGION" --cluster "$ECS_CLUSTER_NAME" --service-name "$ECS_SERVICE_NAME" --desired-status RUNNING --query 'taskArns' --output text || true) + TASK_ARNS=$(aws ecs list-tasks --region "$AWS_REGION" --cluster "$ECS_CLUSTER_NAME" --service-name "$ECS_SERVICE_NAME" --desired-status RUNNING --query 'taskArns' --output text | tr '\t' ' ' || true) TRANSCODER_TARGETS_YAML="" if [ -n "${TASK_ARNS:-}" ]; then ENI_IDS=$(aws ecs describe-tasks --region "$AWS_REGION" --cluster "$ECS_CLUSTER_NAME" --tasks $TASK_ARNS \ - --query 'tasks[].attachments[].details[?name==`networkInterfaceId`].value' --output text || true) + --query 'tasks[].attachments[].details[?name==`networkInterfaceId`].value' --output text | tr '\t' ' ' || true) if [ -n "${ENI_IDS:-}" ]; then TRANSCODER_IPS=$(aws ec2 describe-network-interfaces --region "$AWS_REGION" --network-interface-ids $ENI_IDS \ --query 'NetworkInterfaces[].PrivateIpAddress' --output text || true) if [ -n "${TRANSCODER_IPS:-}" ]; then - TRANSCODER_TARGETS_YAML=$(printf '%s\n' $TRANSCODER_IPS | sed '/^$/d' | sort -u | awk '{printf "\"%s:8080\",", $0}' | sed 's/,$//') + TRANSCODER_TARGETS_YAML=$(printf '%s\n' $TRANSCODER_IPS | sed '/^$/d' | sort -u | awk '{printf "\"%s:8082\",", $0}' | sed 's/,$//') fi fi fi diff --git a/.github/workflows/deploy-rabbitmq.yml b/.github/workflows/deploy-rabbitmq.yml index 7b2282aa..ad0fc85f 100644 --- a/.github/workflows/deploy-rabbitmq.yml +++ b/.github/workflows/deploy-rabbitmq.yml @@ -51,6 +51,11 @@ jobs: run: | set -euo pipefail + if [[ "$RABBITMQ_VHOST" == *"'"* ]] || [[ "$RABBITMQ_VHOST" == *$'\n'* ]] || [[ "$RABBITMQ_VHOST" == *$'\r'* ]]; then + echo "Invalid rabbitmq_vhost input. Single quote and newlines are not allowed." >&2 + exit 1 + fi + INSTANCE_ID=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ --filters "Name=tag:Name,Values=${INSTANCE_TAG}" "Name=instance-state-name,Values=running" \ From 3e1213865d6085eb1deeed2afe25d30de87fd364 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 15:43:32 +0900 Subject: [PATCH 06/10] =?UTF-8?q?[OT-14]=20[FIX]:=20deploy-ai=20monitoring?= =?UTF-8?q?=20=EC=A1=B0=ED=9A=8C=EB=A5=BC=20=ED=94=84=EB=A1=9C=EC=A0=9D?= =?UTF-8?q?=ED=8A=B8/=ED=99=98=EA=B2=BD=20=ED=83=9C=EA=B7=B8=EB=A1=9C=20?= =?UTF-8?q?=ED=95=9C=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-ai.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-ai.yml b/.github/workflows/deploy-ai.yml index 1aa17c65..46404c84 100644 --- a/.github/workflows/deploy-ai.yml +++ b/.github/workflows/deploy-ai.yml @@ -84,6 +84,7 @@ jobs: ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ env.AWS_REGION }}.amazonaws.com IMAGE_TAG_INPUT: ${{ github.event.inputs.image_tag }} PROJECT_NAME: oplust + ENV_NAME: dev SSM_MACHINE_ENV_PARAM: /oplust/machine/env SSM_MACHINE_NODE_EXPORTER_TARGET_PARAM: /oplust/monitoring/targets/machine-node-exporter run: | @@ -98,12 +99,12 @@ jobs: MONITORING_PRIVATE_IP=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ - --filters "Name=tag:Name,Values=monitoring" "Name=instance-state-name,Values=running" \ + --filters "Name=tag:Name,Values=monitoring" "Name=tag:Project,Values=${PROJECT_NAME}" "Name=tag:Env,Values=${ENV_NAME}" "Name=instance-state-name,Values=running" \ --query "Reservations[0].Instances[0].PrivateIpAddress" \ --output text) if [ -z "$MONITORING_PRIVATE_IP" ] || [ "$MONITORING_PRIVATE_IP" = "None" ]; then - echo "No running monitoring instance found for tag: monitoring" >&2 + echo "No running monitoring instance found for Name=monitoring, Project=${PROJECT_NAME}, Env=${ENV_NAME}" >&2 exit 1 fi @@ -126,7 +127,7 @@ jobs: MONITORING_SG_ID=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ - --filters "Name=tag:Name,Values=monitoring" "Name=instance-state-name,Values=running" \ + --filters "Name=tag:Name,Values=monitoring" "Name=tag:Project,Values=${PROJECT_NAME}" "Name=tag:Env,Values=${ENV_NAME}" "Name=instance-state-name,Values=running" \ --query "Reservations[0].Instances[0].SecurityGroups[0].GroupId" \ --output text) From c456878ddd915171a5511b4d28aeb22aeeee69a0 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 16:07:40 +0900 Subject: [PATCH 07/10] [OT-14] fix(ci): harden SSM deploys and align workflows with terraform infra --- .github/workflows/deploy-ai.yml | 20 ++++++++++++++++++++ .github/workflows/deploy-ec2-docker.yml | 15 +++++++++++++++ .github/workflows/deploy-monitoring.yml | 20 ++++++++++++++++++++ .github/workflows/deploy-rabbitmq.yml | 20 ++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/.github/workflows/deploy-ai.yml b/.github/workflows/deploy-ai.yml index 46404c84..e68b46cd 100644 --- a/.github/workflows/deploy-ai.yml +++ b/.github/workflows/deploy-ai.yml @@ -119,6 +119,26 @@ jobs: exit 1 fi + wait_for_ssm_online() { + local iid="$1" + for _ in $(seq 1 30); do + ping_status=$(aws ssm describe-instance-information \ + --region "$AWS_REGION" \ + --filters "Key=InstanceIds,Values=${iid}" \ + --query 'InstanceInformationList[0].PingStatus' \ + --output text 2>/dev/null || true) + if [ "$ping_status" = "Online" ]; then + return 0 + fi + sleep 5 + done + return 1 + } + + if ! wait_for_ssm_online "$INSTANCE_ID"; then + echo "Instance is running but not SSM Online: $INSTANCE_ID" >&2 + exit 1 + fi MACHINE_PRIVATE_IP=$(aws ec2 describe-instances \ --region "$AWS_REGION" \ --instance-ids "$INSTANCE_ID" \ diff --git a/.github/workflows/deploy-ec2-docker.yml b/.github/workflows/deploy-ec2-docker.yml index 33b84cb1..eb935a40 100644 --- a/.github/workflows/deploy-ec2-docker.yml +++ b/.github/workflows/deploy-ec2-docker.yml @@ -127,6 +127,21 @@ jobs: exit 1 fi + wait_for_ssm_online() { + local iid="$1" + for _ in $(seq 1 30); do + ping_status=$(aws ssm describe-instance-information \ + --region "$AWS_REGION" \ + --filters "Key=InstanceIds,Values=${iid}" \ + --query 'InstanceInformationList[0].PingStatus' \ + --output text 2>/dev/null || true) + if [ "$ping_status" = "Online" ]; then + return 0 + fi + sleep 5 + done + return 1 + } deploy_service() { local target_tag="$1" local image_uri="$2" diff --git a/.github/workflows/deploy-monitoring.yml b/.github/workflows/deploy-monitoring.yml index d9346f3e..9103d021 100644 --- a/.github/workflows/deploy-monitoring.yml +++ b/.github/workflows/deploy-monitoring.yml @@ -161,6 +161,26 @@ jobs: exit 1 fi + wait_for_ssm_online() { + local iid="$1" + for _ in $(seq 1 30); do + ping_status=$(aws ssm describe-instance-information \ + --region "$AWS_REGION" \ + --filters "Key=InstanceIds,Values=${iid}" \ + --query 'InstanceInformationList[0].PingStatus' \ + --output text 2>/dev/null || true) + if [ "$ping_status" = "Online" ]; then + return 0 + fi + sleep 5 + done + return 1 + } + + if ! wait_for_ssm_online "$INSTANCE_ID"; then + echo "Instance is running but not SSM Online: $INSTANCE_ID" >&2 + exit 1 + fi COMPOSE_B64=$(base64 -w 0 apps/monitoring/docker-compose.yml) COMPOSE_PROD_B64=$(base64 -w 0 apps/monitoring/docker-compose.prod.yml) PROM_PROD_B64=$(base64 -w 0 apps/monitoring/prometheus/prometheus.prod.yml) diff --git a/.github/workflows/deploy-rabbitmq.yml b/.github/workflows/deploy-rabbitmq.yml index ad0fc85f..552db05d 100644 --- a/.github/workflows/deploy-rabbitmq.yml +++ b/.github/workflows/deploy-rabbitmq.yml @@ -67,6 +67,26 @@ jobs: exit 1 fi + wait_for_ssm_online() { + local iid="$1" + for _ in $(seq 1 30); do + ping_status=$(aws ssm describe-instance-information \ + --region "$AWS_REGION" \ + --filters "Key=InstanceIds,Values=${iid}" \ + --query 'InstanceInformationList[0].PingStatus' \ + --output text 2>/dev/null || true) + if [ "$ping_status" = "Online" ]; then + return 0 + fi + sleep 5 + done + return 1 + } + + if ! wait_for_ssm_online "$INSTANCE_ID"; then + echo "Instance is running but not SSM Online: $INSTANCE_ID" >&2 + exit 1 + fi PARAMS=$(jq -nc \ --arg c1 "set -euo pipefail" \ --arg c2 "command -v jq >/dev/null 2>&1 || sudo dnf -y install jq" \ From b25458043ffc16ed27749f287b7125ae1c48c452 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 16:38:26 +0900 Subject: [PATCH 08/10] [OT-14] fix(ci): align ai env parameter path with dev scoped ssm --- .github/workflows/deploy-ai.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-ai.yml b/.github/workflows/deploy-ai.yml index e68b46cd..6a988ed4 100644 --- a/.github/workflows/deploy-ai.yml +++ b/.github/workflows/deploy-ai.yml @@ -85,7 +85,7 @@ jobs: IMAGE_TAG_INPUT: ${{ github.event.inputs.image_tag }} PROJECT_NAME: oplust ENV_NAME: dev - SSM_MACHINE_ENV_PARAM: /oplust/machine/env + SSM_MACHINE_ENV_PARAM: /oplust/dev/machine/env SSM_MACHINE_NODE_EXPORTER_TARGET_PARAM: /oplust/monitoring/targets/machine-node-exporter run: | set -euo pipefail From 4abe536ddf1aaf5abb89c7f82b330269b65fcb03 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 17:19:17 +0900 Subject: [PATCH 09/10] =?UTF-8?q?[OT-14]=20[CHORE]:=20deploy-ec2-docker=20?= =?UTF-8?q?OIDC=20=EB=94=94=EB=B2=84=EA=B7=B8=20=EA=B2=80=EC=A6=9D=20?= =?UTF-8?q?=EC=8A=A4=ED=85=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-ec2-docker.yml | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/.github/workflows/deploy-ec2-docker.yml b/.github/workflows/deploy-ec2-docker.yml index eb935a40..530ab743 100644 --- a/.github/workflows/deploy-ec2-docker.yml +++ b/.github/workflows/deploy-ec2-docker.yml @@ -37,12 +37,32 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Validate OIDC inputs + env: + ROLE_ARN: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} + run: | + set -euo pipefail + if [ -z "${ROLE_ARN:-}" ]; then + echo "AWS_GITHUB_ACTIONS_ROLE_ARN is empty or not injected." >&2 + exit 1 + fi + if ! echo "$ROLE_ARN" | grep -Eq '^arn:aws:iam::[0-9]{12}:role/.+'; then + echo "AWS_GITHUB_ACTIONS_ROLE_ARN format is invalid: expected role ARN." >&2 + exit 1 + fi + echo "GitHub ref: ${GITHUB_REF}" + echo "GitHub ref_name: ${GITHUB_REF_NAME}" + echo "Role ARN format check passed." + - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} + - name: Verify assumed identity + run: aws sts get-caller-identity + - name: Login to ECR uses: aws-actions/amazon-ecr-login@v2 @@ -76,12 +96,32 @@ jobs: needs: build-and-push steps: + - name: Validate OIDC inputs + env: + ROLE_ARN: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} + run: | + set -euo pipefail + if [ -z "${ROLE_ARN:-}" ]; then + echo "AWS_GITHUB_ACTIONS_ROLE_ARN is empty or not injected." >&2 + exit 1 + fi + if ! echo "$ROLE_ARN" | grep -Eq '^arn:aws:iam::[0-9]{12}:role/.+'; then + echo "AWS_GITHUB_ACTIONS_ROLE_ARN format is invalid: expected role ARN." >&2 + exit 1 + fi + echo "GitHub ref: ${GITHUB_REF}" + echo "GitHub ref_name: ${GITHUB_REF_NAME}" + echo "Role ARN format check passed." + - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.AWS_GITHUB_ACTIONS_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} + - name: Verify assumed identity + run: aws sts get-caller-identity + - name: Deploy to EC2 instances via SSM env: ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ env.AWS_REGION }}.amazonaws.com From d6cd836a2e3220bf1dc1547a1bbb38cb66048786 Mon Sep 17 00:00:00 2001 From: arlen02-01 Date: Fri, 15 May 2026 18:15:47 +0900 Subject: [PATCH 10/10] [OT-14] ci: wait for ec2 running and ssm online before send-command --- .github/workflows/deploy-ec2-docker.yml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-ec2-docker.yml b/.github/workflows/deploy-ec2-docker.yml index 530ab743..0c14fc86 100644 --- a/.github/workflows/deploy-ec2-docker.yml +++ b/.github/workflows/deploy-ec2-docker.yml @@ -1,4 +1,4 @@ -name: Deploy Docker Apps To EC2 +name: Deploy Docker Apps To EC2 on: workflow_dispatch: @@ -202,6 +202,26 @@ jobs: echo "No running instance found for tag: ${target_tag}" >&2 exit 1 fi + local instance_state + instance_state=$(aws ec2 describe-instances \ + --region "$AWS_REGION" \ + --instance-ids "$instance_id" \ + --query "Reservations[0].Instances[0].State.Name" \ + --output text) + + if [ "$instance_state" != "running" ]; then + echo "Instance is not running for tag ${target_tag}: ${instance_id} (${instance_state})" >&2 + exit 1 + fi + + if ! wait_for_ssm_online "$instance_id"; then + echo "Instance is not SSM online for tag ${target_tag}: ${instance_id}" >&2 + aws ssm describe-instance-information \ + --region "$AWS_REGION" \ + --filters "Key=InstanceIds,Values=${instance_id}" \ + --output table || true + exit 1 + fi local private_ip private_ip=$(aws ec2 describe-instances \