RedHatQE · oharan2 · Apr 18, 2025 · Apr 24, 2025 · Apr 27, 2025 · Apr 27, 2025
diff --git a/Dockerfile.dev b/Dockerfile.dev
@@ -21,13 +21,21 @@ RUN git clone --depth=1 https://github.com/RedHatQE/cloudwash.git && \
 
 RUN /bin/bash -c 'cd ${CLOUDWASH_DIR}; for conffile in conf/*.yaml.template; do cp -- "$conffile" "${conffile%.yaml.template}.yaml"; done'
 
+# Install openshift-installer cli
+RUN curl -kf  https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-install-linux.tar.gz \
+    -o openshift-install-linux.tar.gz && \
+    tar zxf openshift-install-linux.tar.gz  && \
+    chmod +x openshift-install
+
 # adding .profile to environment variables, so it will be kept between shell sessions
 RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${APP_ROOT}/.profile
 
 # arbitrary UID handling starting from virtualenv directory for pip permissions
 USER 0
 RUN fix-permissions ${APP_ROOT} -P && \
     git config --global --add safe.directory ${CLOUDWASH_DIR}
+RUN touch /usr/local/bin/ && cp openshift-install /usr/local/bin/
+
 USER 1001
 WORKDIR "${CLOUDWASH_DIR}"
 

diff --git a/cloudwash/cli.py b/cloudwash/cli.py
@@ -105,8 +105,9 @@ def azure(ctx, vms, discs, nics, images, pips, _all, _all_rg):
     is_flag=True,
     help="Remove only unused OCP Cluster occupied resources from the provider",
 )
+@click.option("-y", "--yes", is_flag=True, help="Answer yes to all prompts")
 @click.pass_context
-def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all):
+def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, yes, _all):
     # Validate Amazon Settings
     validate_provider(ctx.command.name)
     is_dry_run = ctx.parent.params["dry"]
@@ -118,6 +119,7 @@ def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all):
         pips=pips,
         stacks=stacks,
         ocps=ocps,
+        yes=yes,
         _all=_all,
         dry_run=is_dry_run,
     )

diff --git a/cloudwash/constants.py b/cloudwash/constants.py
@@ -3,3 +3,15 @@
 gce_data = ['VMS', 'NICS', 'DISCS']
 vmware_data = ['VMS', 'NICS', 'DISCS']
 container_data = ['CONTAINERS']
+
+# OCP resources tags for filtering
+OCP_TAG_SUBSTR = "kubernetes.io/cluster/"
+CLUSTER_NAME_TAGS = [
+    "clusterName",
+    "api.openshift.com/name",
+]
+CLUSTER_ID_TAGS = [
+    "openshiftClusterID",
+    "api.openshift.com/id",
+]
+CLUSTER_EXP_DATE_TAG = "expirationDate"
diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py
@@ -1,63 +1,215 @@
+import tempfile
+
 from cloudwash.config import settings
+from cloudwash.constants import CLUSTER_EXP_DATE_TAG
+from cloudwash.constants import CLUSTER_ID_TAGS
+from cloudwash.constants import CLUSTER_NAME_TAGS
+from cloudwash.constants import OCP_TAG_SUBSTR
 from cloudwash.entities.resources.base import OCPsCleanup
-from cloudwash.utils import calculate_time_threshold
+from cloudwash.logger import logger
+from cloudwash.utils import check_installer_exists
+from cloudwash.utils import destroy_ocp_cluster_wrapper
 from cloudwash.utils import dry_data
 from cloudwash.utils import filter_resources_by_time_modified
-from cloudwash.utils import group_ocps_by_cluster
-from cloudwash.utils import OCP_TAG_SUBSTR
+from cloudwash.utils import write_metadata_file
+
+
+class LeftoverAWSOcp:
+    def __init__(self, infra_id: str, region: str):
+        self.infra_id = infra_id
+        self.region = region
+        self.associated_resources = {"Resources": [], "Instances": []}
+        self._cluster_name = ""  # Extract using resources tags
+        self._cluster_id = ""  # Extract using resources tags
+        self._expiration_date = ""  # Extract using resources tags
+
+    def __repr__(self):
+        return (
+            f'{self.infra_id}, Region: {self.region}, Instances: '
+            f'{len(self.associated_resources.get("Instances"))}, other resources: '
+            f'{len(self.associated_resources.get("Resources"))})'
+        )
+
+    def get_cluster_info(
+        self,
+    ):
+        for resources_types in self.associated_resources.values():
+            for resource in resources_types:
+                if all([self._cluster_id, self._cluster_name, self._expiration_date]):
+                    break
+                if not self._expiration_date:
+                    exp_date = resource.get_tag_value(key=CLUSTER_EXP_DATE_TAG)
+                    if exp_date:
+                        self._expiration_date = exp_date
+                for name in CLUSTER_NAME_TAGS:
+                    if not self._cluster_name:
+                        name_tag = resource.get_tag_value(key=name)
+                        if name_tag:
+                            self._cluster_name = name_tag
+                for id in CLUSTER_ID_TAGS:
+                    if not self._cluster_id:
+                        id_tag = resource.get_tag_value(key=id)
+                        if id_tag:
+                            self._cluster_id = id_tag
+
+    def get_cluster_metadata(
+        self,
+    ):
+        """
+        TODO Complete
+        TODO Check if we can extract HostedZoneRole, clusterDomain
+        """
+        # Prepare the data
+        infraID = self.infra_id
+        clusterName = self._cluster_name or infraID
+        clusterID = self._cluster_id or infraID
+
+        logger.info(f"\nPreparing metadata for cluster: {infraID}")
+
+        cluster_metadata = {
+            "clusterName": f"{clusterName}",
+            "clusterID": f"{clusterID}",
+            "infraID": f"{infraID}",
+            "aws": {
+                "region": self.region,
+                "identifier": [{f"{OCP_TAG_SUBSTR}{infraID}": "owned"}],
+            },
+        }
+        return cluster_metadata
 
 
 class CleanOCPs(OCPsCleanup):
-    def __init__(self, client):
-        self.client = client
-        self._delete = []
+    def __init__(self):
+        self._deletable = {"ocp_clusters": [], "filtered_leftovers": []}
+        self._cluster_map = {}
         self.list()
 
     def _set_dry(self):
-        dry_data['OCPS']['delete'] = self._delete
+        def _make_printable(resources: list):
+            return {
+                ocp.resource_type: [
+                    r.name for r in resources if r.resource_type == ocp.resource_type
+                ]
+                for ocp in resources
+            }
+
+        dry_data['OCPS']['delete'] = _make_printable(self._deletable["filtered_leftovers"])
+        dry_data['OCPS']['clusters'] = self._deletable["ocp_clusters"]
 
     def list(self):
         pass
 
     def remove(self):
         pass
 
-    def cleanup(self):
+    def cleanup(self, user_validation=False):
         if not settings.dry_run:
-            self.remove()
+            check_installer_exists()
+            with tempfile.TemporaryDirectory() as tmpdir:
+                for cluster_name in self._deletable["ocp_clusters"]:
+                    cluster = self._cluster_map[cluster_name]
+                    cluster.get_cluster_info()
+                    cluster.metadata = cluster.get_cluster_metadata()
+                    metadata_path = write_metadata_file(
+                        cluster_metadata=cluster.metadata, cleanup_dir=tmpdir
+                    )
+                    destroy_ocp_cluster_wrapper(
+                        metadata_path=metadata_path,
+                        cluster_name=cluster_name,
+                        user_validation=user_validation,
+                    )
 
 
 class CleanAWSOcps(CleanOCPs):
-    def list(self):
-        resources = []
-        time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.get("SLA"))
+    def __init__(self, client):
+        self.client = client
+        self.cleaning_region = self.client.cleaning_region
+        super().__init__()
 
-        ocp_prefix = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""])
-        for prefix in ocp_prefix:
-            query = " ".join(
-                [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.client.cleaning_region}"]
-            )
-            resources.extend(self.client.list_resources(query=query))
 
-        # Prepare resources to be filtered before deletion
-        cluster_map = group_ocps_by_cluster(resources=resources)
-        for cluster_name in cluster_map.keys():
-            cluster_resources = cluster_map[cluster_name].get("Resources")
-            instances = cluster_map[cluster_name].get("Instances")
+    def group_ocps_by_cluster(self, resources: list = None) -> dict:
+        """Group different types of AWS resources under their original OCP clusters
+
+        :param list resources: AWS resources collected by defined region and sla
+        :return: A dictionary with the clusters as keys and the associated resources as values
+        """
+        if resources is None:
+            resources = []
+        clusters_map = {}
+
+        for resource in resources:
+            for key in resource.get_tags(regex=OCP_TAG_SUBSTR):
+                cluster_infra_id = key.get("Key")
+                if OCP_TAG_SUBSTR in cluster_infra_id:
+                    # Considering the following format: "kubernetes.io/cluster/<CLUSTER_INFRA_ID>"
+                    cluster_infra_id = cluster_infra_id.split(OCP_TAG_SUBSTR)[1]
+                    if cluster_infra_id not in clusters_map.keys():
+                        clusters_map[cluster_infra_id] = LeftoverAWSOcp(
+                            infra_id=cluster_infra_id, region=self.cleaning_region
+                        )
+
+                    # Set cluster's EC2 instances
+                    if hasattr(resource, 'ec2_instance'):
+                        clusters_map[cluster_infra_id].associated_resources["Instances"].append(
+                            resource
+                        )
+                    # Set resource under cluster
+                    else:
+                        clusters_map[cluster_infra_id].associated_resources["Resources"].append(
+                            resource
+                        )
+        return clusters_map
+
+    def _filter_deletable(self):
+        time_threshold = settings.aws.criteria.ocps.get("SLA")
+        for cluster in self._cluster_map.keys():
+            resources = self._cluster_map[cluster].associated_resources.get("Resources")
+            instances = self._cluster_map[cluster].associated_resources.get("Instances")
+            leftover_ocp = False
 
             if instances:
                 # For resources with associated EC2 Instances, filter by Instances SLA
-                if not filter_resources_by_time_modified(
+                if filter_resources_by_time_modified(
                     time_threshold,
                     resources=instances,
                 ):
-                    self._delete.extend(cluster_resources)
+                    leftover_ocp = True
+                    # If cluster is not selected due to other resources being used,
+                    # the instances will only be printed in dry run
+                    self._deletable["filtered_leftovers"].extend(instances)
             else:
-                # For resources with no associated EC2 Instances, identify as leftovers
-                self._delete.extend(
-                    filter_resources_by_time_modified(time_threshold, resources=cluster_resources)
-                )
+                # For resources with no associated EC2 Instances, consider as leftovers
+                leftover_ocp = True
+
+            if leftover_ocp:
+                # Filter all cluster resources by SLA to avoid deletion of resources that are
+                # in use, like EBS volume or security groups
+                if filter_resources_by_time_modified(time_threshold, resources=resources):
+                    # Will not collect resources recorded during the SLA time
+                    self._deletable["ocp_clusters"].append(cluster)
+                    self._deletable["filtered_leftovers"].extend(resources)
+                else:
+                    logger.info(
+                        f"Found resources in use, skipping the deletion of cluster {cluster}"
+                    )
+
+    def list(self):
+        resources = []
+
+        ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""])
+        for prefix in ocp_prefixes:
+            query = " ".join(
+                [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.cleaning_region}"]
+            )
+            resources.extend(self.client.list_resources(query=query))
+
+        # Filter resources by SLA before deletion
+        self._cluster_map = self.group_ocps_by_cluster(resources=resources)
+        self._filter_deletable()
 
-        # Sort resources by type
-        self._delete = sorted(self._delete, key=lambda x: x.resource_type)
+        # Sort resources by type and cluster by name
+        self._deletable["filtered_leftovers"] = sorted(
+            self._deletable["filtered_leftovers"], key=lambda x: x.resource_type
+        )
+        self._deletable["ocp_clusters"] = sorted(self._deletable["ocp_clusters"])
         self._set_dry()
diff --git a/cloudwash/providers/aws.py b/cloudwash/providers/aws.py
@@ -13,6 +13,7 @@
 
 def cleanup(**kwargs):
     is_dry_run = kwargs.get("dry_run", False)
+    user_validate = kwargs.get("yes", False)
     dry_data['PROVIDER'] = "AWS"
     regions = settings.aws.auth.regions
     all_data = []
@@ -30,7 +31,7 @@ def cleanup(**kwargs):
                 for items in data:
                     dry_data[items]['delete'] = []
                 logger.info(f"\nResources from the region: {region}")
-                awscleanup.ocps.cleanup()
+                awscleanup.ocps.cleanup(user_validation=user_validate)
                 if is_dry_run:
                     echo_dry(dry_data)
                     all_data.append(deepcopy(dry_data))