From e3c014e7f79bcf2d335d7e855f1851c037f84f45 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 6 Apr 2026 12:35:02 -0400 Subject: [PATCH 1/3] metrcs cut --- .../git-cache/central-configmap.yaml | 0 .../git-cache/central-headless-service.yaml | 0 .../git-cache/central-metrics-service.yaml | 0 .../{ => base}/git-cache/central-pdb.yaml.tpl | 0 .../{ => base}/git-cache/central-service.yaml | 0 .../git-cache/central-statefulset.yaml.tpl | 0 .../git-cache/daemonset-configmap.yaml | 0 .../{ => base}/git-cache/daemonset.yaml | 0 .../kubernetes/{ => base}/git-cache/deploy.sh | 0 .../{ => base}/git-cache/kustomization.yaml | 0 .../kubernetes/{ => base}/git-cache/rbac.yaml | 0 .../git-cache/scripts/python/.coverage | Bin .../git-cache/scripts/python/central_lib.py | 0 .../git-cache/scripts/python/daemonset_lib.py | 0 .../scripts/python/test_central_lib.py | 0 .../scripts/python/test_daemonset_lib.py | 0 .../git-cache/tests/smoke/conftest.py | 0 .../git-cache/tests/smoke/test_git_cache.py | 0 .../{ => base}/harbor-namespace.yaml | 0 osdc/base/kubernetes/base/kustomization.yaml | 10 +++ .../{ => base}/nvidia-device-plugin.yaml | 0 .../{ => base}/registry-mirror-config.yaml | 0 osdc/base/kubernetes/kustomization.yaml | 16 ++--- .../overlays/eks/kustomization.yaml | 8 +++ .../eks}/node-performance-tuning.yaml | 0 .../{ => overlays/eks}/storageclass-gp3.yaml | 0 .../overlays/gke/kustomization.yaml | 8 +++ osdc/justfile | 63 +++++++++++++----- osdc/scripts/bootstrap-state-gcp.sh | 47 +++++++++++++ osdc/scripts/bootstrap-state.sh | 9 +++ 30 files changed, 134 insertions(+), 27 deletions(-) rename osdc/base/kubernetes/{ => base}/git-cache/central-configmap.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/central-headless-service.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/central-metrics-service.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/central-pdb.yaml.tpl (100%) rename osdc/base/kubernetes/{ => base}/git-cache/central-service.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/central-statefulset.yaml.tpl (100%) rename osdc/base/kubernetes/{ => base}/git-cache/daemonset-configmap.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/daemonset.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/deploy.sh (100%) rename osdc/base/kubernetes/{ => base}/git-cache/kustomization.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/rbac.yaml (100%) rename osdc/base/kubernetes/{ => base}/git-cache/scripts/python/.coverage (100%) rename osdc/base/kubernetes/{ => base}/git-cache/scripts/python/central_lib.py (100%) rename osdc/base/kubernetes/{ => base}/git-cache/scripts/python/daemonset_lib.py (100%) rename osdc/base/kubernetes/{ => base}/git-cache/scripts/python/test_central_lib.py (100%) rename osdc/base/kubernetes/{ => base}/git-cache/scripts/python/test_daemonset_lib.py (100%) rename osdc/base/kubernetes/{ => base}/git-cache/tests/smoke/conftest.py (100%) rename osdc/base/kubernetes/{ => base}/git-cache/tests/smoke/test_git_cache.py (100%) rename osdc/base/kubernetes/{ => base}/harbor-namespace.yaml (100%) create mode 100644 osdc/base/kubernetes/base/kustomization.yaml rename osdc/base/kubernetes/{ => base}/nvidia-device-plugin.yaml (100%) rename osdc/base/kubernetes/{ => base}/registry-mirror-config.yaml (100%) create mode 100644 osdc/base/kubernetes/overlays/eks/kustomization.yaml rename osdc/base/kubernetes/{ => overlays/eks}/node-performance-tuning.yaml (100%) rename osdc/base/kubernetes/{ => overlays/eks}/storageclass-gp3.yaml (100%) create mode 100644 osdc/base/kubernetes/overlays/gke/kustomization.yaml create mode 100755 osdc/scripts/bootstrap-state-gcp.sh diff --git a/osdc/base/kubernetes/git-cache/central-configmap.yaml b/osdc/base/kubernetes/base/git-cache/central-configmap.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/central-configmap.yaml rename to osdc/base/kubernetes/base/git-cache/central-configmap.yaml diff --git a/osdc/base/kubernetes/git-cache/central-headless-service.yaml b/osdc/base/kubernetes/base/git-cache/central-headless-service.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/central-headless-service.yaml rename to osdc/base/kubernetes/base/git-cache/central-headless-service.yaml diff --git a/osdc/base/kubernetes/git-cache/central-metrics-service.yaml b/osdc/base/kubernetes/base/git-cache/central-metrics-service.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/central-metrics-service.yaml rename to osdc/base/kubernetes/base/git-cache/central-metrics-service.yaml diff --git a/osdc/base/kubernetes/git-cache/central-pdb.yaml.tpl b/osdc/base/kubernetes/base/git-cache/central-pdb.yaml.tpl similarity index 100% rename from osdc/base/kubernetes/git-cache/central-pdb.yaml.tpl rename to osdc/base/kubernetes/base/git-cache/central-pdb.yaml.tpl diff --git a/osdc/base/kubernetes/git-cache/central-service.yaml b/osdc/base/kubernetes/base/git-cache/central-service.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/central-service.yaml rename to osdc/base/kubernetes/base/git-cache/central-service.yaml diff --git a/osdc/base/kubernetes/git-cache/central-statefulset.yaml.tpl b/osdc/base/kubernetes/base/git-cache/central-statefulset.yaml.tpl similarity index 100% rename from osdc/base/kubernetes/git-cache/central-statefulset.yaml.tpl rename to osdc/base/kubernetes/base/git-cache/central-statefulset.yaml.tpl diff --git a/osdc/base/kubernetes/git-cache/daemonset-configmap.yaml b/osdc/base/kubernetes/base/git-cache/daemonset-configmap.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/daemonset-configmap.yaml rename to osdc/base/kubernetes/base/git-cache/daemonset-configmap.yaml diff --git a/osdc/base/kubernetes/git-cache/daemonset.yaml b/osdc/base/kubernetes/base/git-cache/daemonset.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/daemonset.yaml rename to osdc/base/kubernetes/base/git-cache/daemonset.yaml diff --git a/osdc/base/kubernetes/git-cache/deploy.sh b/osdc/base/kubernetes/base/git-cache/deploy.sh similarity index 100% rename from osdc/base/kubernetes/git-cache/deploy.sh rename to osdc/base/kubernetes/base/git-cache/deploy.sh diff --git a/osdc/base/kubernetes/git-cache/kustomization.yaml b/osdc/base/kubernetes/base/git-cache/kustomization.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/kustomization.yaml rename to osdc/base/kubernetes/base/git-cache/kustomization.yaml diff --git a/osdc/base/kubernetes/git-cache/rbac.yaml b/osdc/base/kubernetes/base/git-cache/rbac.yaml similarity index 100% rename from osdc/base/kubernetes/git-cache/rbac.yaml rename to osdc/base/kubernetes/base/git-cache/rbac.yaml diff --git a/osdc/base/kubernetes/git-cache/scripts/python/.coverage b/osdc/base/kubernetes/base/git-cache/scripts/python/.coverage similarity index 100% rename from osdc/base/kubernetes/git-cache/scripts/python/.coverage rename to osdc/base/kubernetes/base/git-cache/scripts/python/.coverage diff --git a/osdc/base/kubernetes/git-cache/scripts/python/central_lib.py b/osdc/base/kubernetes/base/git-cache/scripts/python/central_lib.py similarity index 100% rename from osdc/base/kubernetes/git-cache/scripts/python/central_lib.py rename to osdc/base/kubernetes/base/git-cache/scripts/python/central_lib.py diff --git a/osdc/base/kubernetes/git-cache/scripts/python/daemonset_lib.py b/osdc/base/kubernetes/base/git-cache/scripts/python/daemonset_lib.py similarity index 100% rename from osdc/base/kubernetes/git-cache/scripts/python/daemonset_lib.py rename to osdc/base/kubernetes/base/git-cache/scripts/python/daemonset_lib.py diff --git a/osdc/base/kubernetes/git-cache/scripts/python/test_central_lib.py b/osdc/base/kubernetes/base/git-cache/scripts/python/test_central_lib.py similarity index 100% rename from osdc/base/kubernetes/git-cache/scripts/python/test_central_lib.py rename to osdc/base/kubernetes/base/git-cache/scripts/python/test_central_lib.py diff --git a/osdc/base/kubernetes/git-cache/scripts/python/test_daemonset_lib.py b/osdc/base/kubernetes/base/git-cache/scripts/python/test_daemonset_lib.py similarity index 100% rename from osdc/base/kubernetes/git-cache/scripts/python/test_daemonset_lib.py rename to osdc/base/kubernetes/base/git-cache/scripts/python/test_daemonset_lib.py diff --git a/osdc/base/kubernetes/git-cache/tests/smoke/conftest.py b/osdc/base/kubernetes/base/git-cache/tests/smoke/conftest.py similarity index 100% rename from osdc/base/kubernetes/git-cache/tests/smoke/conftest.py rename to osdc/base/kubernetes/base/git-cache/tests/smoke/conftest.py diff --git a/osdc/base/kubernetes/git-cache/tests/smoke/test_git_cache.py b/osdc/base/kubernetes/base/git-cache/tests/smoke/test_git_cache.py similarity index 100% rename from osdc/base/kubernetes/git-cache/tests/smoke/test_git_cache.py rename to osdc/base/kubernetes/base/git-cache/tests/smoke/test_git_cache.py diff --git a/osdc/base/kubernetes/harbor-namespace.yaml b/osdc/base/kubernetes/base/harbor-namespace.yaml similarity index 100% rename from osdc/base/kubernetes/harbor-namespace.yaml rename to osdc/base/kubernetes/base/harbor-namespace.yaml diff --git a/osdc/base/kubernetes/base/kustomization.yaml b/osdc/base/kubernetes/base/kustomization.yaml new file mode 100644 index 00000000..0ae31465 --- /dev/null +++ b/osdc/base/kubernetes/base/kustomization.yaml @@ -0,0 +1,10 @@ +# Base Kubernetes resources shared across ALL providers (EKS, GKE, etc.). +# Provider-specific resources go in overlays//. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - harbor-namespace.yaml + - nvidia-device-plugin.yaml + - registry-mirror-config.yaml + - git-cache/ diff --git a/osdc/base/kubernetes/nvidia-device-plugin.yaml b/osdc/base/kubernetes/base/nvidia-device-plugin.yaml similarity index 100% rename from osdc/base/kubernetes/nvidia-device-plugin.yaml rename to osdc/base/kubernetes/base/nvidia-device-plugin.yaml diff --git a/osdc/base/kubernetes/registry-mirror-config.yaml b/osdc/base/kubernetes/base/registry-mirror-config.yaml similarity index 100% rename from osdc/base/kubernetes/registry-mirror-config.yaml rename to osdc/base/kubernetes/base/registry-mirror-config.yaml diff --git a/osdc/base/kubernetes/kustomization.yaml b/osdc/base/kubernetes/kustomization.yaml index 8dfdd6a9..a5407ea3 100644 --- a/osdc/base/kubernetes/kustomization.yaml +++ b/osdc/base/kubernetes/kustomization.yaml @@ -1,16 +1,12 @@ # Base Kubernetes resources applied to EVERY cluster. -# These are cluster-agnostic and required regardless of which modules are enabled. +# Shared (provider-agnostic) resources live in base/. +# Provider-specific resources live in overlays//. # -# Module-specific resources belong in osdc//kubernetes/ instead. +# This top-level kustomization points to the EKS overlay for backward +# compatibility — deploy-base's `kubectl apply -k base/kubernetes/` works +# unchanged. Future providers (GKE, AKS) use their own overlay directly. apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - storageclass-gp3.yaml - - node-performance-tuning.yaml - - nvidia-device-plugin.yaml - - harbor-namespace.yaml - - git-cache/ - - registry-mirror-config.yaml - # NOTE: Namespaces for modules (arc-runners, arc-systems, buildkit, etc.) - # are created by the module's own kubernetes/ directory, not here. + - overlays/eks diff --git a/osdc/base/kubernetes/overlays/eks/kustomization.yaml b/osdc/base/kubernetes/overlays/eks/kustomization.yaml new file mode 100644 index 00000000..5705312d --- /dev/null +++ b/osdc/base/kubernetes/overlays/eks/kustomization.yaml @@ -0,0 +1,8 @@ +# EKS overlay: shared base + EKS-specific resources. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + - storageclass-gp3.yaml + - node-performance-tuning.yaml diff --git a/osdc/base/kubernetes/node-performance-tuning.yaml b/osdc/base/kubernetes/overlays/eks/node-performance-tuning.yaml similarity index 100% rename from osdc/base/kubernetes/node-performance-tuning.yaml rename to osdc/base/kubernetes/overlays/eks/node-performance-tuning.yaml diff --git a/osdc/base/kubernetes/storageclass-gp3.yaml b/osdc/base/kubernetes/overlays/eks/storageclass-gp3.yaml similarity index 100% rename from osdc/base/kubernetes/storageclass-gp3.yaml rename to osdc/base/kubernetes/overlays/eks/storageclass-gp3.yaml diff --git a/osdc/base/kubernetes/overlays/gke/kustomization.yaml b/osdc/base/kubernetes/overlays/gke/kustomization.yaml new file mode 100644 index 00000000..b6859541 --- /dev/null +++ b/osdc/base/kubernetes/overlays/gke/kustomization.yaml @@ -0,0 +1,8 @@ +# GKE overlay: shared base + GKE-specific resources. +# Consumer adds GKE-specific resources (StorageClass, etc.) via patches or +# by overriding this overlay in their own modules/ directory. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base diff --git a/osdc/justfile b/osdc/justfile index 3a26ec74..cf0485c6 100644 --- a/osdc/justfile +++ b/osdc/justfile @@ -70,14 +70,31 @@ show cluster: echo " Tofu vars:"; \ uv run {{CFG}} {{cluster}} tfvars | tr ' ' '\n' | sed 's/^/ /' -# Update kubeconfig for a cluster (aws eks update-kubeconfig) +# Update kubeconfig for a cluster (supports EKS and GKE) kubeconfig cluster: - @export CLUSTERS_YAML="{{CLUSTERS_YAML}}"; \ - CNAME=$(uv run {{CFG}} {{cluster}} cluster_name); \ - REGION=$(uv run {{CFG}} {{cluster}} region); \ - echo "Updating kubeconfig for $CNAME ($REGION)..."; \ - NO_PROXY="${NO_PROXY:-},.eks.amazonaws.com" no_proxy="${no_proxy:-},.eks.amazonaws.com" \ - "{{UPSTREAM}}/scripts/kubeconfig-lock.sh" --name "$CNAME" --region "$REGION" --alias "$CNAME" + #!/usr/bin/env bash + set -euo pipefail + source "{{UPSTREAM}}/scripts/mise-activate.sh" + export CLUSTERS_YAML="{{CLUSTERS_YAML}}" + CLUSTER="{{cluster}}" + CLOUD=$(uv run {{CFG}} "$CLUSTER" cloud aws) + CNAME=$(uv run {{CFG}} "$CLUSTER" cluster_name) + REGION=$(uv run {{CFG}} "$CLUSTER" region) + if [[ "$CLOUD" == "gcp" ]]; then + PROJECT=$(uv run {{CFG}} "$CLUSTER" gcp_project) + echo "Updating kubeconfig for $CNAME ($REGION, GCP)..." + if gcloud container clusters describe "$CNAME" \ + --region "$REGION" --project "$PROJECT" >/dev/null 2>&1; then + gcloud container clusters get-credentials "$CNAME" \ + --region "$REGION" --project "$PROJECT" + else + echo " GCP cluster $CNAME not found yet — skipping (deploy will set kubeconfig)." + fi + else + echo "Updating kubeconfig for $CNAME ($REGION)..." + NO_PROXY="${NO_PROXY:-},.eks.amazonaws.com" no_proxy="${no_proxy:-},.eks.amazonaws.com" \ + "{{UPSTREAM}}/scripts/kubeconfig-lock.sh" --name "$CNAME" --region "$REGION" --alias "$CNAME" + fi # ============================================================================ # BOOTSTRAP @@ -126,15 +143,20 @@ deploy cluster: [[ $REPLY =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; } fi - just deploy-base "$CLUSTER" + # deploy-base handles EKS base infra (terraform, harbor, base k8s). + # Non-AWS providers handle this in their provider module (e.g., gke). + CLOUD=$(uv run {{CFG}} "$CLUSTER" cloud aws) + if [[ "$CLOUD" == "aws" ]]; then + just deploy-base "$CLUSTER" + fi for module in $(uv run {{CFG}} "$CLUSTER" modules); do just deploy-module "$CLUSTER" "$module" done - # Recycle Karpenter nodes if configured (e.g., staging — ensures fresh userData/AMI) + # Recycle Karpenter nodes if configured (EKS only) RECYCLE=$(uv run {{CFG}} "$CLUSTER" recycle_karpenter_nodes false) - if [ "$RECYCLE" = "true" ]; then + if [[ "$CLOUD" == "aws" ]] && [ "$RECYCLE" = "true" ]; then echo "" echo "── Recycling Karpenter nodes (recycle_karpenter_nodes=true) ──" just recycle-nodes "$CLUSTER" @@ -146,10 +168,10 @@ deploy cluster: echo "DEPLOYMENT COMPLETE: $CLUSTER ($elapsed)" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - # Taint ARC runner nodes for graceful refresh (production only) + # Taint ARC runner nodes for graceful refresh (EKS/Karpenter only) # When nodes are recycled (staging), tainting is pointless — they're already being destroyed. # OSDC_TAINT_NODES=yes|no|ask (default: ask) - if [ "$RECYCLE" != "true" ]; then + if [[ "$CLOUD" == "aws" ]] && [ "$RECYCLE" != "true" ]; then TAINT="${OSDC_TAINT_NODES:-ask}" if [ "$TAINT" = "yes" ]; then echo "" @@ -331,12 +353,19 @@ deploy-module cluster module: if [[ -f "$MODULE_DIR/terraform/main.tf" ]]; then echo " Applying terraform for $MODULE..." BUCKET=$(uv run {{CFG}} "$CLUSTER" state_bucket) + CLOUD=$(uv run {{CFG}} "$CLUSTER" cloud aws) cd "$MODULE_DIR/terraform" - tofu init -reconfigure \ - -backend-config="bucket=${BUCKET}" \ - -backend-config="key=${CLUSTER}/${MODULE}/terraform.tfstate" \ - -backend-config="region=us-west-2" \ - -backend-config="dynamodb_table=ciforge-terraform-locks" + if [[ "$CLOUD" == "gcp" ]]; then + tofu init -reconfigure \ + -backend-config="bucket=${BUCKET}" \ + -backend-config="prefix=${CLUSTER}/${MODULE}" + else + tofu init -reconfigure \ + -backend-config="bucket=${BUCKET}" \ + -backend-config="key=${CLUSTER}/${MODULE}/terraform.tfstate" \ + -backend-config="region=us-west-2" \ + -backend-config="dynamodb_table=ciforge-terraform-locks" + fi # Modules get cluster_name and aws_region as minimum vars set +e diff --git a/osdc/scripts/bootstrap-state-gcp.sh b/osdc/scripts/bootstrap-state-gcp.sh new file mode 100755 index 00000000..a548ae1a --- /dev/null +++ b/osdc/scripts/bootstrap-state-gcp.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail +# +# Bootstrap GCS state bucket for a GCP cluster. +# +# Creates: +# - GCS bucket for state storage (versioned, uniform IAM) +# +# GCS provides native state locking — no DynamoDB equivalent needed. +# +# Usage: +# ./scripts/bootstrap-state-gcp.sh +# +# Idempotent: safe to run multiple times. + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=/dev/null +source "$SCRIPT_DIR/mise-activate.sh" +CONFIG_PY="$SCRIPT_DIR/cluster-config.py" + +CLUSTER="${1:?Usage: $0 }" +PROJECT=$(uv run "$CONFIG_PY" "$CLUSTER" gcp_project) +BUCKET=$(uv run "$CONFIG_PY" "$CLUSTER" state_bucket) +REGION=$(uv run "$CONFIG_PY" "$CLUSTER" region) + +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Bootstrapping state for: $CLUSTER" +echo " Bucket: $BUCKET (region: $REGION)" +echo " Project: $PROJECT" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +if gcloud storage buckets describe "gs://${BUCKET}" --project="${PROJECT}" >/dev/null 2>&1; then + echo " Bucket '${BUCKET}' already exists, skipping create." +else + echo " Creating bucket '${BUCKET}'..." + gcloud storage buckets create "gs://${BUCKET}" \ + --project="${PROJECT}" \ + --location="${REGION}" \ + --uniform-bucket-level-access +fi + +echo " Enabling versioning..." +gcloud storage buckets update "gs://${BUCKET}" --versioning + +echo " Done." +echo "" +echo "State bootstrapping complete." diff --git a/osdc/scripts/bootstrap-state.sh b/osdc/scripts/bootstrap-state.sh index 4c3dd5cf..793b8107 100755 --- a/osdc/scripts/bootstrap-state.sh +++ b/osdc/scripts/bootstrap-state.sh @@ -24,6 +24,15 @@ STATE_REGION="us-west-2" bootstrap_cluster() { local cluster_id="$1" + local cloud + cloud=$(uv run "$CONFIG_PY" "$cluster_id" cloud aws) + + # Non-AWS providers have their own bootstrap script + if [[ "$cloud" != "aws" ]]; then + "$SCRIPT_DIR/bootstrap-state-${cloud}.sh" "$cluster_id" + return + fi + local bucket bucket=$(uv run "$CONFIG_PY" "$cluster_id" state_bucket) From 995b1971a0a192a10d8f1c2751f85d054d5d1501 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 6 Apr 2026 12:40:23 -0400 Subject: [PATCH 2/3] Revert justfile multi-cloud changes Keep only restructure + bootstrap for now. Justfile multi-cloud routing (kubeconfig, deploy, deploy-module) will come in a later PR. --- osdc/justfile | 63 ++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 46 deletions(-) diff --git a/osdc/justfile b/osdc/justfile index cf0485c6..3a26ec74 100644 --- a/osdc/justfile +++ b/osdc/justfile @@ -70,31 +70,14 @@ show cluster: echo " Tofu vars:"; \ uv run {{CFG}} {{cluster}} tfvars | tr ' ' '\n' | sed 's/^/ /' -# Update kubeconfig for a cluster (supports EKS and GKE) +# Update kubeconfig for a cluster (aws eks update-kubeconfig) kubeconfig cluster: - #!/usr/bin/env bash - set -euo pipefail - source "{{UPSTREAM}}/scripts/mise-activate.sh" - export CLUSTERS_YAML="{{CLUSTERS_YAML}}" - CLUSTER="{{cluster}}" - CLOUD=$(uv run {{CFG}} "$CLUSTER" cloud aws) - CNAME=$(uv run {{CFG}} "$CLUSTER" cluster_name) - REGION=$(uv run {{CFG}} "$CLUSTER" region) - if [[ "$CLOUD" == "gcp" ]]; then - PROJECT=$(uv run {{CFG}} "$CLUSTER" gcp_project) - echo "Updating kubeconfig for $CNAME ($REGION, GCP)..." - if gcloud container clusters describe "$CNAME" \ - --region "$REGION" --project "$PROJECT" >/dev/null 2>&1; then - gcloud container clusters get-credentials "$CNAME" \ - --region "$REGION" --project "$PROJECT" - else - echo " GCP cluster $CNAME not found yet — skipping (deploy will set kubeconfig)." - fi - else - echo "Updating kubeconfig for $CNAME ($REGION)..." - NO_PROXY="${NO_PROXY:-},.eks.amazonaws.com" no_proxy="${no_proxy:-},.eks.amazonaws.com" \ - "{{UPSTREAM}}/scripts/kubeconfig-lock.sh" --name "$CNAME" --region "$REGION" --alias "$CNAME" - fi + @export CLUSTERS_YAML="{{CLUSTERS_YAML}}"; \ + CNAME=$(uv run {{CFG}} {{cluster}} cluster_name); \ + REGION=$(uv run {{CFG}} {{cluster}} region); \ + echo "Updating kubeconfig for $CNAME ($REGION)..."; \ + NO_PROXY="${NO_PROXY:-},.eks.amazonaws.com" no_proxy="${no_proxy:-},.eks.amazonaws.com" \ + "{{UPSTREAM}}/scripts/kubeconfig-lock.sh" --name "$CNAME" --region "$REGION" --alias "$CNAME" # ============================================================================ # BOOTSTRAP @@ -143,20 +126,15 @@ deploy cluster: [[ $REPLY =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 1; } fi - # deploy-base handles EKS base infra (terraform, harbor, base k8s). - # Non-AWS providers handle this in their provider module (e.g., gke). - CLOUD=$(uv run {{CFG}} "$CLUSTER" cloud aws) - if [[ "$CLOUD" == "aws" ]]; then - just deploy-base "$CLUSTER" - fi + just deploy-base "$CLUSTER" for module in $(uv run {{CFG}} "$CLUSTER" modules); do just deploy-module "$CLUSTER" "$module" done - # Recycle Karpenter nodes if configured (EKS only) + # Recycle Karpenter nodes if configured (e.g., staging — ensures fresh userData/AMI) RECYCLE=$(uv run {{CFG}} "$CLUSTER" recycle_karpenter_nodes false) - if [[ "$CLOUD" == "aws" ]] && [ "$RECYCLE" = "true" ]; then + if [ "$RECYCLE" = "true" ]; then echo "" echo "── Recycling Karpenter nodes (recycle_karpenter_nodes=true) ──" just recycle-nodes "$CLUSTER" @@ -168,10 +146,10 @@ deploy cluster: echo "DEPLOYMENT COMPLETE: $CLUSTER ($elapsed)" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - # Taint ARC runner nodes for graceful refresh (EKS/Karpenter only) + # Taint ARC runner nodes for graceful refresh (production only) # When nodes are recycled (staging), tainting is pointless — they're already being destroyed. # OSDC_TAINT_NODES=yes|no|ask (default: ask) - if [[ "$CLOUD" == "aws" ]] && [ "$RECYCLE" != "true" ]; then + if [ "$RECYCLE" != "true" ]; then TAINT="${OSDC_TAINT_NODES:-ask}" if [ "$TAINT" = "yes" ]; then echo "" @@ -353,19 +331,12 @@ deploy-module cluster module: if [[ -f "$MODULE_DIR/terraform/main.tf" ]]; then echo " Applying terraform for $MODULE..." BUCKET=$(uv run {{CFG}} "$CLUSTER" state_bucket) - CLOUD=$(uv run {{CFG}} "$CLUSTER" cloud aws) cd "$MODULE_DIR/terraform" - if [[ "$CLOUD" == "gcp" ]]; then - tofu init -reconfigure \ - -backend-config="bucket=${BUCKET}" \ - -backend-config="prefix=${CLUSTER}/${MODULE}" - else - tofu init -reconfigure \ - -backend-config="bucket=${BUCKET}" \ - -backend-config="key=${CLUSTER}/${MODULE}/terraform.tfstate" \ - -backend-config="region=us-west-2" \ - -backend-config="dynamodb_table=ciforge-terraform-locks" - fi + tofu init -reconfigure \ + -backend-config="bucket=${BUCKET}" \ + -backend-config="key=${CLUSTER}/${MODULE}/terraform.tfstate" \ + -backend-config="region=us-west-2" \ + -backend-config="dynamodb_table=ciforge-terraform-locks" # Modules get cluster_name and aws_region as minimum vars set +e From 231e6ec1cab5bdcbdbaa2ff2fd7ccf70cc44ca27 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 6 Apr 2026 12:49:58 -0400 Subject: [PATCH 3/3] metrcs cut --- osdc/justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osdc/justfile b/osdc/justfile index 3a26ec74..48ac7cd7 100644 --- a/osdc/justfile +++ b/osdc/justfile @@ -83,7 +83,7 @@ kubeconfig cluster: # BOOTSTRAP # ============================================================================ -# Bootstrap S3 state bucket + DynamoDB lock table for a cluster +# Bootstrap remote state storage for a cluster (S3/GCS, auto-detected) bootstrap cluster: @OSDC_ROOT="{{ROOT}}" OSDC_UPSTREAM="{{UPSTREAM}}" CLUSTERS_YAML="{{CLUSTERS_YAML}}" {{SCRIPTS}}/bootstrap-state.sh {{cluster}}