From 050f141470cc792181480b54958475261bb4de50 Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:45:45 +0800 Subject: [PATCH 1/7] feat: add GKE Terraform configuration for sifnode deployment --- deploy/terraform/providers/gke/main.tf | 291 +++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 deploy/terraform/providers/gke/main.tf diff --git a/deploy/terraform/providers/gke/main.tf b/deploy/terraform/providers/gke/main.tf new file mode 100644 index 0000000000..11a89d697d --- /dev/null +++ b/deploy/terraform/providers/gke/main.tf @@ -0,0 +1,291 @@ +terraform { + required_version = ">= 0.13" +} + +provider "google" { + project = var.project_id + region = var.region +} + +provider "google-beta" { + project = var.project_id + region = var.region +} + +provider "kubernetes" { + host = "https://${google_container_cluster.primary.endpoint}" + cluster_ca_certificate = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate) + token = data.google_client_config.current.access_token +} + +data "google_client_config" "current" {} + +resource "google_service_account" "sifnode" { + account_id = "${var.cluster_name}-sa" + display_name = "sifnode GKE Service Account" + description = "Service account for sifnode GKE cluster operations" +} + +resource "google_project_iam_member" "sifnode_permissions" { + for_each = toset([ + "roles/container.admin", + "roles/compute.admin", + "roles/iam.serviceAccountUser", + "roles/storage.admin" + ]) + role = each.key + member = "serviceAccount:${google_service_account.sifnode.email}" +} + +resource "google_container_cluster" "primary" { + provider = google-beta + name = var.cluster_name + location = var.region + + remove_default_node_pool = true + initial_node_count = var.initial_node_count + + network = google_compute_network.vpc.name + subnetwork = google_compute_subnetwork.subnet.name + + master_auth { + client_certificate_config { + issue_client_certificate = false + } + } + + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } + + private_cluster_config { + enable_private_nodes = var.enable_private_nodes + enable_private_endpoint = false + master_ipv4_cidr_block = var.master_ipv4_cidr + } + + master_authorized_networks_config { + dynamic "cidr_blocks" { + for_each = var.authorized_cidrs + content { + cidr_block = cidr_blocks.value + display_name = "authorized-${cidr_blocks.key}" + } + } + } + + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + cluster_autoscaling { + enabled = var.enable_autoscaling + resource_limits { + resource_type = "cpu" + minimum = var.min_cpu + maximum = var.max_cpu + } + resource_limits { + resource_type = "memory" + minimum = var.min_memory_mb + maximum = var.max_memory_mb + } + } + + node_pool_defaults { + node_config_defaults { + image_type = var.image_type + } + } + + release_channel { + channel = var.release_channel + } + + maintenance_policy { + daily_maintenance_window { + start_time = var.maintenance_window + } + } + + labels = var.tags + + lifecycle { + ignore_changes = [ + node_pool, + initial_node_count + ] + } +} + +resource "google_compute_network" "vpc" { + name = "${var.cluster_name}-vpc" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "subnet" { + name = "${var.cluster_name}-subnet" + ip_cidr_range = var.subnet_cidr + region = var.region + network = google_compute_network.vpc.id + + secondary_ip_range = [ + { + range_name = "pods" + ip_cidr_range = var.pods_cidr + }, + { + range_name = "services" + ip_cidr_range = var.services_cidr + } + ] +} + +resource "google_compute_router" "router" { + name = "${var.cluster_name}-router" + region = var.region + network = google_compute_network.vpc.id +} + +resource "google_compute_router_nat" "nat" { + name = "${var.cluster_name}-nat" + router = google_compute_router.router.name + region = var.region + nat_ip_allocate_option = "AUTO_ONLY" + source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES" +} + +resource "google_container_node_pool" "primary_nodes" { + name = "${var.cluster_name}-node-pool" + cluster = google_container_cluster.primary.id + location = var.region + node_count = var.desired_capacity + + autoscaling { + min_node_count = var.min_capacity + max_node_count = var.max_capacity + } + + management { + auto_repair = true + auto_upgrade = true + } + + node_config { + machine_type = var.machine_type + disk_size_gb = var.disk_size + disk_type = var.disk_type + image_type = var.image_type + + service_account = google_service_account.sifnode.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/devstorage.read_only", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring.write" + ] + + labels = merge(var.tags, { + role = "sifnode" + }) + + tags = ["sifnode", var.cluster_name] + + workload_metadata_config { + mode = "GKE_METADATA" + } + } + + lifecycle { + ignore_changes = [node_count] + } +} + +resource "google_compute_firewall" "sifnode" { + name = "${var.cluster_name}-sifnode" + network = google_compute_network.vpc.name + + allow { + protocol = "tcp" + ports = ["26656", "26657", "1317", "9090"] + } + + source_ranges = ["0.0.0.0/0"] + target_tags = ["sifnode", var.cluster_name] +} + +resource "google_compute_firewall" "ssh" { + name = "${var.cluster_name}-ssh" + network = google_compute_network.vpc.name + + allow { + protocol = "tcp" + ports = ["22"] + } + + source_ranges = var.ssh_source_ranges + target_tags = ["sifnode"] +} + +resource "google_compute_disk" "sifnoded_data" { + name = "${var.cluster_name}-sifnoded-data" + type = "pd-ssd" + zone = var.default_zone + size = var.data_disk_size + labels = var.tags +} + +resource "google_compute_disk" "sifnodecli_data" { + name = "${var.cluster_name}-sifnodecli-data" + type = "pd-ssd" + zone = var.default_zone + size = var.data_disk_size + labels = var.tags +} + +resource "kubernetes_storage_class" "ssd" { + metadata { + name = "gke-ssd" + } + storage_provisioner = "kubernetes.io/gce-pd" + parameters = { + type = "pd-ssd" + } + allow_volume_expansion = true + reclaim_policy = "Retain" +} + +resource "kubernetes_persistent_volume_claim" "sifnoded" { + metadata { + name = "sifnoded-pvc" + namespace = var.namespace + labels = var.tags + } + spec { + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = var.storage_size + } + } + storage_class_name = kubernetes_storage_class.ssd.metadata[0].name + } +} + +resource "kubernetes_persistent_volume_claim" "sifnodecli" { + metadata { + name = "sifnodecli-pvc" + namespace = var.namespace + labels = var.tags + } + spec { + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = var.storage_size + } + } + storage_class_name = kubernetes_storage_class.ssd.metadata[0].name + } +} From f253e56516b809f11d4011c5e3940d9bd4feff61 Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:45:52 +0800 Subject: [PATCH 2/7] Add GKE Terraform variables --- deploy/terraform/providers/gke/variables.tf | 170 ++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 deploy/terraform/providers/gke/variables.tf diff --git a/deploy/terraform/providers/gke/variables.tf b/deploy/terraform/providers/gke/variables.tf new file mode 100644 index 0000000000..bf5d4adde9 --- /dev/null +++ b/deploy/terraform/providers/gke/variables.tf @@ -0,0 +1,170 @@ +variable "project_id" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region" + type = string + default = "us-central1" +} + +variable "default_zone" { + description = "Default GCP zone for zonal resources" + type = string + default = "us-central1-a" +} + +variable "cluster_name" { + description = "GKE cluster name" + type = string +} + +variable "namespace" { + description = "Kubernetes namespace" + type = string + default = "sifnode" +} + +variable "cluster_version" { + description = "GKE cluster version (use 'latest' for current default)" + type = string + default = "latest" +} + +variable "release_channel" { + description = "GKE release channel (REGULAR, STABLE, RAPID)" + type = string + default = "REGULAR" +} + +variable "subnet_cidr" { + description = "Subnet CIDR range" + type = string + default = "10.0.0.0/20" +} + +variable "pods_cidr" { + description = "CIDR range for pods" + type = string + default = "10.4.0.0/14" +} + +variable "services_cidr" { + description = "CIDR range for services" + type = string + default = "10.8.0.0/20" +} + +variable "master_ipv4_cidr" { + description = "CIDR range for the GKE master" + type = string + default = "172.16.0.0/28" +} + +variable "authorized_cidrs" { + description = "Map of authorized CIDR blocks for master access" + type = map(string) + default = { + "default" = "0.0.0.0/0" + } +} + +variable "enable_private_nodes" { + description = "Enable private nodes" + type = bool + default = false +} + +variable "enable_autoscaling" { + description = "Enable cluster autoscaling" + type = bool + default = true +} + +variable "initial_node_count" { + description = "Initial number of nodes" + default = 1 +} + +variable "desired_capacity" { + description = "Desired node count" + default = 1 +} + +variable "min_capacity" { + description = "Minimum node count" + default = 1 +} + +variable "max_capacity" { + description = "Maximum node count" + default = 5 +} + +variable "machine_type" { + description = "GCE machine type for nodes" + default = "e2-standard-2" +} + +variable "image_type" { + description = "Node image type" + default = "COS_CONTAINERD" +} + +variable "disk_size" { + description = "Node boot disk size in GB" + default = 100 +} + +variable "disk_type" { + description = "Node boot disk type" + default = "pd-standard" +} + +variable "data_disk_size" { + description = "Persistent data disk size in GB" + default = 50 +} + +variable "storage_size" { + description = "PVC storage size" + default = "50Gi" +} + +variable "min_cpu" { + description = "Minimum CPU for autoscaler" + default = 1 +} + +variable "max_cpu" { + description = "Maximum CPU for autoscaler" + default = 8 +} + +variable "min_memory_mb" { + description = "Minimum memory in MB for autoscaler" + default = 4096 +} + +variable "max_memory_mb" { + description = "Maximum memory in MB for autoscaler" + default = 32768 +} + +variable "maintenance_window" { + description = "Daily maintenance window start time (UTC)" + default = "03:00" +} + +variable "ssh_source_ranges" { + description = "CIDR ranges allowed for SSH access" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "tags" { + description = "Tags to apply to resources" + type = map(string) + default = {} +} From 096f530d9a3cfdb814f65004c6efc94427df012c Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:46:00 +0800 Subject: [PATCH 3/7] Add GKE Terraform outputs --- deploy/terraform/providers/gke/outputs.tf | 70 +++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 deploy/terraform/providers/gke/outputs.tf diff --git a/deploy/terraform/providers/gke/outputs.tf b/deploy/terraform/providers/gke/outputs.tf new file mode 100644 index 0000000000..e84ab2204e --- /dev/null +++ b/deploy/terraform/providers/gke/outputs.tf @@ -0,0 +1,70 @@ +output "cluster_name" { + description = "GKE cluster name" + value = google_container_cluster.primary.name +} + +output "cluster_location" { + description = "GKE cluster location" + value = google_container_cluster.primary.location +} + +output "cluster_endpoint" { + description = "GKE cluster endpoint" + value = google_container_cluster.primary.endpoint +} + +output "cluster_ca_certificate" { + description = "GKE cluster CA certificate" + value = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate) + sensitive = true +} + +output "kubectl_config" { + description = "kubectl configuration command" + value = "gcloud container clusters get-credentials ${google_container_cluster.primary.name} --region ${var.region} --project ${var.project_id}" +} + +output "cluster_region" { + description = "GCP region" + value = var.region +} + +output "service_account" { + description = "GKE service account email" + value = google_service_account.sifnode.email +} + +output "network_name" { + description = "VPC network name" + value = google_compute_network.vpc.name +} + +output "subnet_name" { + description = "Subnet name" + value = google_compute_subnetwork.subnet.name +} + +output "node_pool_name" { + description = "Primary node pool name" + value = google_container_node_pool.primary_nodes.name +} + +output "storage_class" { + description = "SSD storage class name" + value = kubernetes_storage_class.ssd.metadata[0].name +} + +output "helm_install_command" { + description = "Helm install command for sifnode on GKE" + value = <<-EOT + # After configuring kubectl context: + ${google_container_cluster.primary.name} + + # Install sifnode via Helm: + helm install sifnode deploy/helm/sifnode \\ + --namespace sifnode \\ + --create-namespace \\ + --set provider=gke \\ + --set persistence.storageClass=gke-ssd + EOT +} From b16c88fcdc891e708e5a40a186b58925ed7bb7c1 Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:47:08 +0800 Subject: [PATCH 4/7] Add GKE-specific Helm values --- deploy/helm/sifnode/values_gke.yaml | 50 +++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 deploy/helm/sifnode/values_gke.yaml diff --git a/deploy/helm/sifnode/values_gke.yaml b/deploy/helm/sifnode/values_gke.yaml new file mode 100644 index 0000000000..a3d4d18e01 --- /dev/null +++ b/deploy/helm/sifnode/values_gke.yaml @@ -0,0 +1,50 @@ +# GKE-specific values for sifnode Helm chart +# Override with: helm install -f values_gke.yaml + +# Deployment environment: "gke" or "aws" (default: aws for backward compatibility) +provider: gke + +# GKE-specific persistence configuration +persistence: + enabled: true + size: 50Gi + accessMode: ReadWriteOnce + storageClass: gke-ssd # Created by GKE Terraform + +# GKE node configuration +nodeSelector: + cloud.google.com/gke-nodepool: sifnode-pool + +# GKE service configuration +service: + type: LoadBalancer + port: 26656 + annotations: + cloud.google.com/load-balancer-type: Internal # Change to External if needed + +# GKE ingress configuration (optional) +ingress: + enabled: false + annotations: + kubernetes.io/ingress.class: gce + +# GKE IAM workload identity +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: "" # Set to your GCP SA email + +# Resource allocation for GKE (e2-standard-2) +resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "2" + memory: "4Gi" + +# GKE autoscaling +autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 3 + targetCPUUtilizationPercentage: 80 From 53cb8142136f0fc3d9935121b13c9d3174034ca5 Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:47:14 +0800 Subject: [PATCH 5/7] Add GKE deployment automation script --- deploy/gke/deploy_gke.sh | 193 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 deploy/gke/deploy_gke.sh diff --git a/deploy/gke/deploy_gke.sh b/deploy/gke/deploy_gke.sh new file mode 100644 index 0000000000..c850e98062 --- /dev/null +++ b/deploy/gke/deploy_gke.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +# ============================================================= +# sifnode GKE Deployment Script +# ============================================================= +# Usage: +# ./deploy_gke.sh create # Full deployment +# ./deploy_gke.sh destroy # Tear down +# ./deploy_gke.sh update # Update Helm release +# ./deploy_gke.sh status # Check deployment status +# ./deploy_gke.sh -p ... # Specify GCP project +# ./deploy_gke.sh -r ... # Specify region +# ============================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" + +# ── Defaults ────────────────────────────────────────────────── +PROJECT_ID="${GCP_PROJECT_ID:-}" +REGION="${GCP_REGION:-us-central1}" +CLUSTER_NAME="${CLUSTER_NAME:-sifnode-gke}" +NAMESPACE="${NAMESPACE:-sifnode}" +DEPLOY_ENV="${DEPLOY_ENV:-gke}" +TERRAFORM_DIR="${PROJECT_DIR}/deploy/terraform/providers/gke" +HELM_CHART="${PROJECT_DIR}/deploy/helm/sifnode" +HELM_VALUES="${PROJECT_DIR}/deploy/helm/sifnode/values_gke.yaml" + +# ── Colors ──────────────────────────────────────────────────── +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +CYAN='\033[0;36m'; NC='\033[0m' +info() { echo -e "${CYAN}[INFO]${NC} $1"; } +ok() { echo -e "${GREEN}[OK]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +err() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } + +# ── Prerequisites Check ────────────────────────────────────── +check_prereqs() { + info "Checking prerequisites..." + command -v gcloud >/dev/null 2>&1 || err "gcloud CLI not installed. Install: https://cloud.google.com/sdk/docs/install" + command -v terraform >/dev/null 2>&1 || err "Terraform not installed. Install: https://learn.hashicorp.com/tutorials/terraform/install-cli" + command -v kubectl >/dev/null 2>&1 || err "kubectl not installed." + command -v helm >/dev/null 2>&1 || err "Helm not installed." + + if [[ -z "$PROJECT_ID" ]]; then + PROJECT_ID=$(gcloud config get-value project 2>/dev/null || true) + if [[ -z "$PROJECT_ID" ]]; then + err "GCP project ID not set. Use -p or set GCP_PROJECT_ID env var." + fi + fi + ok "Prerequisites satisfied. Project: ${PROJECT_ID}, Region: ${REGION}" +} + +# ── Terraform ───────────────────────────────────────────────── +tf_init() { + info "Initializing Terraform (GKE)..." + cd "$TERRAFORM_DIR" + terraform init -upgrade + ok "Terraform initialized" +} + +tf_apply() { + info "Applying Terraform (GKE)..." + cd "$TERRAFORM_DIR" + terraform apply -auto-approve \ + -var="project_id=${PROJECT_ID}" \ + -var="region=${REGION}" \ + -var="cluster_name=${CLUSTER_NAME}" + ok "Terraform applied successfully" +} + +tf_destroy() { + warn "Destroying GKE infrastructure..." + cd "$TERRAFORM_DIR" + terraform destroy -auto-approve \ + -var="project_id=${PROJECT_ID}" \ + -var="region=${REGION}" \ + -var="cluster_name=${CLUSTER_NAME}" + ok "GKE infrastructure destroyed" +} + +# ── Kubernetes Context ─────────────────────────────────────── +setup_kubectl() { + info "Configuring kubectl..." + gcloud container clusters get-credentials "$CLUSTER_NAME" \ + --region "$REGION" \ + --project "$PROJECT_ID" + kubectl config set-context --current --namespace="$NAMESPACE" 2>/dev/null || true + ok "kubectl configured for cluster: ${CLUSTER_NAME}" +} + +# ── Helm Deployment ─────────────────────────────────────────── +helm_install() { + info "Installing/Upgrading sifnode via Helm..." + kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - + + helm upgrade --install sifnode "$HELM_CHART" \ + --namespace "$NAMESPACE" \ + --create-namespace \ + --values "${HELM_VALUES}" \ + --set provider=gke \ + --set persistence.storageClass=gke-ssd \ + --wait \ + --timeout 15m + ok "sifnode deployed to GKE" +} + +helm_uninstall() { + info "Uninstalling sifnode Helm release..." + helm uninstall sifnode --namespace "$NAMESPACE" 2>/dev/null || true + kubectl delete namespace "$NAMESPACE" --ignore-not-found + ok "sifnode uninstalled" +} + +# ── Status ──────────────────────────────────────────────────── +check_status() { + info "=== GKE Cluster Status ===" + gcloud container clusters describe "$CLUSTER_NAME" \ + --region "$REGION" \ + --project "$PROJECT_ID" \ + --format="table(name, location, status, currentMasterVersion, currentNodeVersion, currentNodeCount)" + + info "=== Pod Status ===" + kubectl get pods -n "$NAMESPACE" -o wide 2>/dev/null || echo "No pods found" + + info "=== Service Status ===" + kubectl get svc -n "$NAMESPACE" 2>/dev/null || echo "No services found" + + info "=== Persistent Volume Claims ===" + kubectl get pvc -n "$NAMESPACE" 2>/dev/null || echo "No PVCs found" +} + +# ── CLI Argument Parsing ───────────────────────────────────── +while [[ $# -gt 0 ]]; do + case "$1" in + -p|--project) PROJECT_ID="$2"; shift 2 ;; + -r|--region) REGION="$2"; shift 2 ;; + -c|--cluster) CLUSTER_NAME="$2"; shift 2 ;; + create|deploy) + check_prereqs + tf_init && tf_apply + setup_kubectl + helm_install + check_status + info "✅ sifnode deployed to GKE successfully!" + info " Cluster: ${CLUSTER_NAME} (${REGION})" + info " Namespace: ${NAMESPACE}" + info " To check logs: kubectl logs -n ${NAMESPACE} -l app=sifnode" + exit 0 + ;; + destroy|teardown) + check_prereqs + helm_uninstall + tf_destroy + info "✅ GKE deployment destroyed." + exit 0 + ;; + update) + check_prereqs + setup_kubectl + helm_install + check_status + exit 0 + ;; + status) + check_prereqs + setup_kubectl + check_status + exit 0 + ;; + --help|-h) + head -20 "$0" | grep "^#" + exit 0 + ;; + *) err "Unknown option: $1. Use --help for usage." ;; + esac +done + +# Interactive mode: show menu +echo "sifnode GKE Deployment Script" +echo "1) Full deployment (create)" +echo "2) Update only (update)" +echo "3) Destroy (destroy)" +echo "4) Status (status)" +echo "Choose (1-4): " +read -r choice +case "$choice" in + 1) $0 create ;; + 2) $0 update ;; + 3) $0 destroy ;; + 4) $0 status ;; + *) err "Invalid choice" ;; +esac From e23ad38b9eb5022a9fabf43cc86fab3ef3af6e31 Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:47:27 +0800 Subject: [PATCH 6/7] Add provider field to Helm values for multi-cloud support --- deploy/helm/sifnode/values.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 deploy/helm/sifnode/values.yaml diff --git a/deploy/helm/sifnode/values.yaml b/deploy/helm/sifnode/values.yaml new file mode 100644 index 0000000000..541bfdffc9 --- /dev/null +++ b/deploy/helm/sifnode/values.yaml @@ -0,0 +1,6 @@ +# sifnode Helm Chart Values +# ── Provider ───────────────────────────────────────────────── +# Deployment environment: "aws" (default) or "gke" +provider: aws + + From d1eec6c35e1c677f35a902d4ae3b913e9e0cd98b Mon Sep 17 00:00:00 2001 From: Betty Murphy <52879@indianschool.bh> Date: Fri, 24 Apr 2026 23:47:34 +0800 Subject: [PATCH 7/7] Add GKE deployment documentation --- docs/gke-deployment.md | 172 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 docs/gke-deployment.md diff --git a/docs/gke-deployment.md b/docs/gke-deployment.md new file mode 100644 index 0000000000..2961dd9fb6 --- /dev/null +++ b/docs/gke-deployment.md @@ -0,0 +1,172 @@ +# Deploying sifnode on Google Kubernetes Engine (GKE) + +This guide walks through deploying a sifnode validator on **Google Kubernetes Engine (GKE)**. + +## Prerequisites + +| Requirement | Version | Install | +|-------------|---------|---------| +| gcloud CLI | latest | [Install](https://cloud.google.com/sdk/docs/install) | +| Terraform | >= 0.13 | [Install](https://learn.hashicorp.com/tutorials/terraform/install-cli) | +| kubectl | >= 1.18 | `gcloud components install kubectl` | +| Helm 3 | >= 3.0 | [Install](https://helm.sh/docs/intro/install/) | + +## Quick Start + +### 1. Authenticate with GCP + +```bash +gcloud auth login +gcloud config set project YOUR_PROJECT_ID +``` + +### 2. Deploy Infrastructure + +```bash +# Clone the repository +git clone https://github.com/Sifchain/sifnode.git +cd sifnode + +# Deploy with the automation script +./deploy/gke/deploy_gke.sh create -p YOUR_PROJECT_ID -r us-central1 +``` + +### 3. Verify Deployment + +```bash +# Check cluster status +./deploy/gke/deploy_gke.sh status + +# Check pod logs +kubectl logs -n sifnode -l app=sifnode + +# Check sifnoded status +kubectl exec -n sifnode deploy/sifnode -- sifnoded status +``` + +## Manual Deployment (Step by Step) + +### Cluster Setup + +```bash +cd deploy/terraform/providers/gke + +terraform init +terraform apply \ + -var="project_id=YOUR_PROJECT_ID" \ + -var="region=us-central1" \ + -var="cluster_name=sifnode-gke" +``` + +### Configure kubectl + +```bash +gcloud container clusters get-credentials sifnode-gke \ + --region us-central1 \ + --project YOUR_PROJECT_ID +``` + +### Install sifnode via Helm + +```bash +# Create namespace +kubectl create namespace sifnode + +# Install with GKE-specific values +helm install sifnode deploy/helm/sifnode \ + --namespace sifnode \ + --create-namespace \ + --values deploy/helm/sifnode/values_gke.yaml \ + --set provider=gke \ + --set persistence.storageClass=gke-ssd + +# Check deployment +kubectl get pods -n sifnode -w +``` + +## Configuration Reference + +### GKE Terraform Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `project_id` | (required) | GCP project ID | +| `region` | `us-central1` | GCP region | +| `cluster_name` | (required) | GKE cluster name | +| `machine_type` | `e2-standard-2` | Node machine type | +| `desired_capacity` | `1` | Initial node count | +| `min_capacity` | `1` | Minimum nodes (autoscaling) | +| `max_capacity` | `5` | Maximum nodes (autoscaling) | +| `disk_size` | `100` | Node disk size (GB) | +| `storage_size` | `50Gi` | PVC storage size | +| `enable_private_nodes` | `false` | Enable private GKE nodes | +| `release_channel` | `REGULAR` | GKE release channel | + +### Helm Values (GKE-specific) + +Key settings in `values_gke.yaml`: + +```yaml +provider: gke +persistence: + storageClass: gke-ssd # SSD persistent storage +resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "2" + memory: "4Gi" +autoscaling: + enabled: true +``` + +## Comparison: AWS (EKS) vs GKE + +| Feature | AWS (EKS) | GKE | +|---------|-----------|-----| +| Provisioning | Terraform (terraform-aws-modules/eks) | Terraform (google_container_cluster) | +| Node Type | t2.medium (x86) | e2-standard-2 (x86) | +| Storage | EBS CSI / EFS CSI | GCE Persistent Disk (pd-ssd) | +| Networking | VPC + Public Subnets | VPC + Subnet + Cloud NAT | +| DNS | Route53 (via ExternalDNS) | Cloud DNS | +| Ingress | ALB Ingress Controller | GCE Ingress | +| IAM | AWS IAM Roles | GCP IAM + Workload Identity | +| CLI Profile | AWS_PROFILE | gcloud config | + +## Troubleshooting + +### Pods stuck in Pending state +```bash +kubectl describe pod -n sifnode +# Check for resource constraints or PVC binding issues +``` + +### GKE cluster not reachable +```bash +gcloud container clusters get-credentials sifnode-gke --region us-central1 +# Ensure your gcloud is authenticated +``` + +### Storage issues +```bash +kubectl get pvc -n sifnode +kubectl get storageclass +# Ensure gke-ssd storage class exists +``` + +## Clean Up + +```bash +# Option 1: Using the deployment script +./deploy/gke/deploy_gke.sh destroy + +# Option 2: Manual +helm uninstall sifnode --namespace sifnode +cd deploy/terraform/providers/gke +terraform destroy -var="project_id=YOUR_PROJECT_ID" +``` + +--- + +*For additional assistance, refer to the [sifnode documentation](https://docs.sifchain.finance/).*