From 56f3344c263deca3e1947e0eb0baf845ca788334 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Fri, 18 Apr 2025 16:27:08 +0300 Subject: [PATCH 01/11] Delete clusters trough openshift-installer --- Dockerfile.dev | 11 +++- cloudwash/constants.py | 1 + cloudwash/entities/resources/ocps.py | 87 ++++++++++++++++++++++------ cloudwash/utils.py | 68 +++++++++++++++++----- 4 files changed, 134 insertions(+), 33 deletions(-) diff --git a/Dockerfile.dev b/Dockerfile.dev index 84874a7f2..79d996acb 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -21,6 +21,13 @@ RUN git clone --depth=1 https://github.com/RedHatQE/cloudwash.git && \ RUN /bin/bash -c 'cd ${CLOUDWASH_DIR}; for conffile in conf/*.yaml.template; do cp -- "$conffile" "${conffile%.yaml.template}.yaml"; done' +# Install openshift-installer cli +RUN curl -kf https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-install-linux.tar.gz \ + -o openshift-install-linux.tar.gz && \ + tar zxf openshift-install-linux.tar.gz && \ + mv openshift-install ${CLOUDWASH_DIR} && \ + chmod +x /${CLOUDWASH_DIR}/openshift-install + # adding .profile to environment variables, so it will be kept between shell sessions RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${APP_ROOT}/.profile @@ -31,5 +38,5 @@ RUN fix-permissions ${APP_ROOT} -P && \ USER 1001 WORKDIR "${CLOUDWASH_DIR}" -ENTRYPOINT ["swach"] -CMD ["--help"] +CMD ["swach"] +# CMD ["--help"] diff --git a/cloudwash/constants.py b/cloudwash/constants.py index 677f581f5..30330c822 100644 --- a/cloudwash/constants.py +++ b/cloudwash/constants.py @@ -3,3 +3,4 @@ gce_data = ['VMS', 'NICS', 'DISCS'] vmware_data = ['VMS', 'NICS', 'DISCS'] container_data = ['CONTAINERS'] +OCP_TAG_SUBSTR = "kubernetes.io/cluster/" diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index b47785199..b60170c6f 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -1,20 +1,37 @@ +import json +import os +import tempfile + from cloudwash.config import settings +from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.entities.resources.base import OCPsCleanup +from cloudwash.logger import logger from cloudwash.utils import calculate_time_threshold +from cloudwash.utils import check_installer_exists +from cloudwash.utils import destroy_ocp_cluster from cloudwash.utils import dry_data -from cloudwash.utils import filter_resources_by_time_modified +from cloudwash.utils import filtered_resources_by_time_modified from cloudwash.utils import group_ocps_by_cluster -from cloudwash.utils import OCP_TAG_SUBSTR class CleanOCPs(OCPsCleanup): def __init__(self, client): self.client = client - self._delete = [] + self._deletable = {"ocp_clusters": [], "filtered_leftovers": []} + self._cluster_map = {} self.list() def _set_dry(self): - dry_data['OCPS']['delete'] = self._delete + def _make_printable(resources: list): + return { + ocp.resource_type: [ + r.name for r in resources if r.resource_type == ocp.resource_type + ] + for ocp in resources + } + + dry_data['OCPS']['delete'] = _make_printable(self._deletable["filtered_leftovers"]) + dry_data['OCPS']['clusters'] = self._deletable["ocp_clusters"] def list(self): pass @@ -22,9 +39,38 @@ def list(self): def remove(self): pass + def prepare_cluster_metadata(self, cluster_name: str, region: str, cleanup_dir: str): + """ + TODO Complete + """ + # Prepare the data + logger.info(f"Preparing metadata for cluster: {cluster_name}") + cluster_metadata = { + "aws": { + "region": region, + "identifier": [{f"{OCP_TAG_SUBSTR}{cluster_name}": "owned"}], + } + } + metadata_file = os.path.join(cleanup_dir, "metadata.json") + + # Write the JSON to the file + with open(metadata_file, "w") as f: + json.dump(cluster_metadata, f) + + logger.debug(f"Metadata written to {metadata_file}") + return metadata_file + def cleanup(self): if not settings.dry_run: - self.remove() + check_installer_exists() + with tempfile.TemporaryDirectory() as tmpdir: + for cluster_name in self._deletable["ocp_clusters"]: + metadata_path = self.prepare_cluster_metadata( + cluster_name=cluster_name, + region=self.client.cleaning_region, + cleanup_dir=tmpdir, + ) + destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) class CleanAWSOcps(CleanOCPs): @@ -32,32 +78,37 @@ def list(self): resources = [] time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.get("SLA")) - ocp_prefix = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) - for prefix in ocp_prefix: + ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) + for prefix in ocp_prefixes: query = " ".join( [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.client.cleaning_region}"] ) resources.extend(self.client.list_resources(query=query)) - # Prepare resources to be filtered before deletion - cluster_map = group_ocps_by_cluster(resources=resources) - for cluster_name in cluster_map.keys(): - cluster_resources = cluster_map[cluster_name].get("Resources") - instances = cluster_map[cluster_name].get("Instances") + # Filter resources by SLA before deletion + self._cluster_map = group_ocps_by_cluster(resources=resources) + for cluster_name in self._cluster_map.keys(): + cluster_resources = self._cluster_map[cluster_name].get("Resources") + instances = self._cluster_map[cluster_name].get("Instances") if instances: # For resources with associated EC2 Instances, filter by Instances SLA - if not filter_resources_by_time_modified( + if not filtered_resources_by_time_modified( time_threshold, resources=instances, ): - self._delete.extend(cluster_resources) + self._deletable["filtered_leftovers"].extend(cluster_resources) + self._deletable["ocp_clusters"].append(cluster_name) else: # For resources with no associated EC2 Instances, identify as leftovers - self._delete.extend( - filter_resources_by_time_modified(time_threshold, resources=cluster_resources) + self._deletable["filtered_leftovers"].extend( + filtered_resources_by_time_modified(time_threshold, resources=cluster_resources) ) + self._deletable["ocp_clusters"].append(cluster_name) - # Sort resources by type - self._delete = sorted(self._delete, key=lambda x: x.resource_type) + # Sort resources by type and cluster by name + self._deletable["filtered_leftovers"] = sorted( + self._deletable["filtered_leftovers"], key=lambda x: x.resource_type + ) + self._deletable["ocp_clusters"] = sorted(self._deletable["ocp_clusters"]) self._set_dry() diff --git a/cloudwash/utils.py b/cloudwash/utils.py index 60e3256d5..4e480de39 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -1,5 +1,7 @@ """Common utils for cleanup activities of all CRs""" import importlib.resources +import os +import subprocess from collections import namedtuple from datetime import datetime @@ -18,10 +20,9 @@ from wrapanapi.systems.ec2 import ResourceExplorerResource from cloudwash.assets import css +from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.logger import logger -OCP_TAG_SUBSTR = "kubernetes.io/cluster/" - _vms_dict = {"VMS": {"delete": [], "stop": [], "skip": []}} _containers_dict = {"CONTAINERS": {"delete": [], "stop": [], "skip": []}} @@ -29,7 +30,7 @@ "NICS": {"delete": []}, "DISCS": {"delete": []}, "PIPS": {"delete": []}, - "OCPS": {"delete": []}, + "OCPS": {"delete": [], "clusters": []}, "RESOURCES": {"delete": []}, "STACKS": {"delete": []}, "IMAGES": {"delete": []}, @@ -62,12 +63,8 @@ def resourcewise_data(dry_data=None) -> dict: "deletable_pips": dry_data["PIPS"]["delete"] if "PIPS" in dry_data else None, "deletable_resources": dry_data["RESOURCES"]["delete"], "deletable_stacks": dry_data["STACKS"]["delete"] if "STACKS" in dry_data else None, - "deletable_ocps": { - ocp.resource_type: [ - r.name for r in dry_data["OCPS"]["delete"] if r.resource_type == ocp.resource_type - ] - for ocp in dry_data["OCPS"]["delete"] - }, + "clusters_ocps": dry_data["OCPS"]["clusters"], + "deletable_ocps": dry_data["OCPS"]["delete"], } return resource_data @@ -280,7 +277,7 @@ def calculate_time_threshold(time_ref=""): return time_threshold -def filter_resources_by_time_modified( +def filtered_resources_by_time_modified( time_threshold, resources: list[ResourceExplorerResource] = None, ) -> list: @@ -303,6 +300,51 @@ def filter_resources_by_time_modified( return filtered_resources -def delete_ocp(ocp): - # WIP: add support for deletion - pass +def check_installer_exists(): + try: + subprocess.run( + ['openshift-install', '--help'], + stdout=subprocess.DEVNULL, # Suppress stdout + stderr=subprocess.DEVNULL, # Suppress stderr + check=False, # Ignore failure + ) + logger.info("Openshift Installer exists") + except FileNotFoundError: + logger.exception( + "Openshift Installer CLI doesn't exists" + "\nUse a docker container env for cleanup or locally install using: " + "https://mirror.openshift.com/pub/openshift-v4/x86_64/" + "clients/ocp/stable/openshift-install-linux.tar." + "\nFor more information check out: https://github.com/openshift/installer." + ) + exit(1) + + +def destroy_ocp_cluster(metadata_path: str, cluster_name: str): + if metadata_path == "" or not os.path.exists(metadata_path): + # Return without raising exception, will try to fetch next OCP cluster info + logger.error(f"Failed to load cluster info from metadata path: {metadata_path}.") + else: + cleanup_dir = metadata_path.split("metadata.json")[0] + env = os.environ.copy() + # if not env.get("AWS_ACCESS_KEY_ID"): + # my_env["AWS_ACCESS_KEY_ID"] = settings.providers.ec2.username + # my_env["AWS_SECRET_ACCESS_KEY"] = settings.providers.ec2.password + try: + subprocess.run( + [ + 'openshift-install', + 'destroy', + 'cluster', + '--dir', + cleanup_dir, + '--log-level=debug', + ], + env=env, + stdout=subprocess.PIPE, + text=True, + check=False, + ) # Use check=True to raise an exception for non-zero return codes + logger.info(f"Successfully destroyed OCP cluster {cluster_name}") + except Exception as ex: + logger.error(f"Failed to cleanup OCP cluster {cluster_name}. Failure info:\n{ex}") From 01017955a58389bb4545cd8f25d7bd097dc46148 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Thu, 24 Apr 2025 15:20:10 +0300 Subject: [PATCH 02/11] Use strict filtering for all associated resources --- cloudwash/entities/resources/ocps.py | 2 ++ cloudwash/utils.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index b60170c6f..1cc23ae60 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -92,6 +92,8 @@ def list(self): instances = self._cluster_map[cluster_name].get("Instances") if instances: + import ipdb + ipdb.set_trace() # For resources with associated EC2 Instances, filter by Instances SLA if not filtered_resources_by_time_modified( time_threshold, diff --git a/cloudwash/utils.py b/cloudwash/utils.py index 4e480de39..c360561be 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -290,6 +290,21 @@ def filtered_resources_by_time_modified( :Example: Use the time_ref "1h" to collect resources that exist for more than an hour """ + # def has_too_new_resources(resources): + # return any(r.date_modified > time_threshold for r in resources) + # + # def filter(resources): + # return [r for r in resources if r.date_modified <= time_threshold] + # + # # Usage + # if first_use_case: + # if has_too_new_resources(resources): + # return False + # elif second_use_case: + # filtered = filter(resources) + + + filtered_resources = [] for resource in resources: From 322f02636321ff12c017dd7f10d03fc088989c24 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Sun, 27 Apr 2025 19:09:32 +0300 Subject: [PATCH 03/11] Filter cluster resources as group --- cloudwash/entities/resources/ocps.py | 21 +++++++++-------- cloudwash/utils.py | 35 ++++++---------------------- 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index 1cc23ae60..38e8945e5 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -10,7 +10,7 @@ from cloudwash.utils import check_installer_exists from cloudwash.utils import destroy_ocp_cluster from cloudwash.utils import dry_data -from cloudwash.utils import filtered_resources_by_time_modified +from cloudwash.utils import filter_resources_by_time_modified from cloudwash.utils import group_ocps_by_cluster @@ -76,7 +76,7 @@ def cleanup(self): class CleanAWSOcps(CleanOCPs): def list(self): resources = [] - time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.get("SLA")) + time_threshold = settings.aws.criteria.ocps.get("SLA") ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) for prefix in ocp_prefixes: @@ -90,22 +90,23 @@ def list(self): for cluster_name in self._cluster_map.keys(): cluster_resources = self._cluster_map[cluster_name].get("Resources") instances = self._cluster_map[cluster_name].get("Instances") + leftover_ocp = False if instances: - import ipdb - ipdb.set_trace() # For resources with associated EC2 Instances, filter by Instances SLA - if not filtered_resources_by_time_modified( + if filter_resources_by_time_modified( time_threshold, resources=instances, ): - self._deletable["filtered_leftovers"].extend(cluster_resources) - self._deletable["ocp_clusters"].append(cluster_name) + leftover_ocp = True + self._deletable["filtered_leftovers"].extend(instances) else: # For resources with no associated EC2 Instances, identify as leftovers - self._deletable["filtered_leftovers"].extend( - filtered_resources_by_time_modified(time_threshold, resources=cluster_resources) - ) + leftover_ocp = True + + if leftover_ocp: + # Will not collect resources recorded during the SLA time + self._deletable["filtered_leftovers"].extend(cluster_resources) self._deletable["ocp_clusters"].append(cluster_name) # Sort resources by type and cluster by name diff --git a/cloudwash/utils.py b/cloudwash/utils.py index c360561be..060d0ae7b 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -277,42 +277,21 @@ def calculate_time_threshold(time_ref=""): return time_threshold -def filtered_resources_by_time_modified( +def filter_resources_by_time_modified( time_threshold, resources: list[ResourceExplorerResource] = None, -) -> list: +) -> bool: """ Filter list of AWS resources by checking modification date ("LastReportedAt") - :param datetime time_threshold: Time filtering criteria + :param str time_threshold: Time filtering criteria :param list resources: List of resources to be filtered out - :return: list of resources that last modified before time threshold + :return: True if all resources in the list last modified before time threshold :Example: Use the time_ref "1h" to collect resources that exist for more than an hour """ - # def has_too_new_resources(resources): - # return any(r.date_modified > time_threshold for r in resources) - # - # def filter(resources): - # return [r for r in resources if r.date_modified <= time_threshold] - # - # # Usage - # if first_use_case: - # if has_too_new_resources(resources): - # return False - # elif second_use_case: - # filtered = filter(resources) - - - - filtered_resources = [] - - for resource in resources: - # Will not collect resources recorded during the SLA time - if resource.date_modified > time_threshold: - continue - filtered_resources.append(resource) - return filtered_resources + time_threshold = calculate_time_threshold(time_ref=time_threshold) + return all(r.date_modified <= time_threshold for r in resources) def check_installer_exists(): @@ -323,7 +302,7 @@ def check_installer_exists(): stderr=subprocess.DEVNULL, # Suppress stderr check=False, # Ignore failure ) - logger.info("Openshift Installer exists") + logger.info("Found Openshift Installer") except FileNotFoundError: logger.exception( "Openshift Installer CLI doesn't exists" From cd55d5ff86c0e04fd41567758be4d39c544ea20f Mon Sep 17 00:00:00 2001 From: oharan2 Date: Sun, 27 Apr 2025 21:28:44 +0300 Subject: [PATCH 04/11] Print openshift-installer cli output only during failures --- cloudwash/utils.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cloudwash/utils.py b/cloudwash/utils.py index 060d0ae7b..c133424b1 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -319,13 +319,14 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): # Return without raising exception, will try to fetch next OCP cluster info logger.error(f"Failed to load cluster info from metadata path: {metadata_path}.") else: + err_msg = f"Failed to cleanup OCP cluster {cluster_name}. Failure info:" cleanup_dir = metadata_path.split("metadata.json")[0] env = os.environ.copy() # if not env.get("AWS_ACCESS_KEY_ID"): # my_env["AWS_ACCESS_KEY_ID"] = settings.providers.ec2.username # my_env["AWS_SECRET_ACCESS_KEY"] = settings.providers.ec2.password try: - subprocess.run( + result = subprocess.run( [ 'openshift-install', 'destroy', @@ -336,9 +337,17 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): ], env=env, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout text=True, check=False, ) # Use check=True to raise an exception for non-zero return codes - logger.info(f"Successfully destroyed OCP cluster {cluster_name}") + if result.returncode != 0: + # Errors (stderr) will be logged into the screen normally + # In addition, catch info, debug and other stdout logs from the openshift-installer cli + logger.error(f"{err_msg}\n{result.stdout}") + exit() + else: + logger.info(f"Successfully destroyed OCP cluster {cluster_name}") except Exception as ex: - logger.error(f"Failed to cleanup OCP cluster {cluster_name}. Failure info:\n{ex}") + # Catch output of the subprocess run error + logger.error(f"{err_msg}\n{ex}") From 2c599334ca356d6ed6c0f331b79c5fdef117ad56 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Sun, 27 Apr 2025 21:32:23 +0300 Subject: [PATCH 05/11] Update note --- cloudwash/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cloudwash/utils.py b/cloudwash/utils.py index c133424b1..a8cb282e3 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -342,10 +342,8 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): check=False, ) # Use check=True to raise an exception for non-zero return codes if result.returncode != 0: - # Errors (stderr) will be logged into the screen normally - # In addition, catch info, debug and other stdout logs from the openshift-installer cli + # Print logs from the openshift-installer cli logger.error(f"{err_msg}\n{result.stdout}") - exit() else: logger.info(f"Successfully destroyed OCP cluster {cluster_name}") except Exception as ex: From b2bd7e66fcddc6321eb53f0e20db8eae3e6f172c Mon Sep 17 00:00:00 2001 From: oharan2 Date: Sun, 27 Apr 2025 22:15:56 +0300 Subject: [PATCH 06/11] Add user confirmation and -y --yes flags to the cli --- cloudwash/cli.py | 4 +++- cloudwash/entities/resources/ocps.py | 15 ++++++++++----- cloudwash/providers/aws.py | 3 ++- cloudwash/utils.py | 28 +++++++++++++++++++++++++++- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/cloudwash/cli.py b/cloudwash/cli.py index 4713d5299..0d48528e9 100644 --- a/cloudwash/cli.py +++ b/cloudwash/cli.py @@ -105,8 +105,9 @@ def azure(ctx, vms, discs, nics, images, pips, _all, _all_rg): is_flag=True, help="Remove only unused OCP Cluster occupied resources from the provider", ) +@click.option("-y", "--yes", is_flag=True, help="Answer yes to all prompts") @click.pass_context -def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all): +def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, yes, _all): # Validate Amazon Settings validate_provider(ctx.command.name) is_dry_run = ctx.parent.params["dry"] @@ -118,6 +119,7 @@ def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all): pips=pips, stacks=stacks, ocps=ocps, + yes=yes, _all=_all, dry_run=is_dry_run, ) diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index 38e8945e5..12cc5251b 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -6,9 +6,8 @@ from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.entities.resources.base import OCPsCleanup from cloudwash.logger import logger -from cloudwash.utils import calculate_time_threshold +from cloudwash.utils import destroy_ocp_cluster_wrapper from cloudwash.utils import check_installer_exists -from cloudwash.utils import destroy_ocp_cluster from cloudwash.utils import dry_data from cloudwash.utils import filter_resources_by_time_modified from cloudwash.utils import group_ocps_by_cluster @@ -44,7 +43,7 @@ def prepare_cluster_metadata(self, cluster_name: str, region: str, cleanup_dir: TODO Complete """ # Prepare the data - logger.info(f"Preparing metadata for cluster: {cluster_name}") + logger.info(f"\nPreparing metadata for cluster: {cluster_name}") cluster_metadata = { "aws": { "region": region, @@ -60,7 +59,7 @@ def prepare_cluster_metadata(self, cluster_name: str, region: str, cleanup_dir: logger.debug(f"Metadata written to {metadata_file}") return metadata_file - def cleanup(self): + def cleanup(self, user_validation=False): if not settings.dry_run: check_installer_exists() with tempfile.TemporaryDirectory() as tmpdir: @@ -70,7 +69,8 @@ def cleanup(self): region=self.client.cleaning_region, cleanup_dir=tmpdir, ) - destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) + destroy_ocp_cluster_wrapper(metadata_path=metadata_path, cluster_name=cluster_name, user_validation=user_validation) + class CleanAWSOcps(CleanOCPs): @@ -105,6 +105,11 @@ def list(self): leftover_ocp = True if leftover_ocp: + if not filter_resources_by_time_modified(time_threshold, resources=cluster_resources): + for r in cluster_resources: + print(r.date_modified) + import ipdb + ipdb.set_trace() # Will not collect resources recorded during the SLA time self._deletable["filtered_leftovers"].extend(cluster_resources) self._deletable["ocp_clusters"].append(cluster_name) diff --git a/cloudwash/providers/aws.py b/cloudwash/providers/aws.py index f9959827e..205df1563 100644 --- a/cloudwash/providers/aws.py +++ b/cloudwash/providers/aws.py @@ -13,6 +13,7 @@ def cleanup(**kwargs): is_dry_run = kwargs.get("dry_run", False) + user_validate = kwargs.get("yes", False) dry_data['PROVIDER'] = "AWS" regions = settings.aws.auth.regions all_data = [] @@ -30,7 +31,7 @@ def cleanup(**kwargs): for items in data: dry_data[items]['delete'] = [] logger.info(f"\nResources from the region: {region}") - awscleanup.ocps.cleanup() + awscleanup.ocps.cleanup(user_validation=user_validate) if is_dry_run: echo_dry(dry_data) all_data.append(deepcopy(dry_data)) diff --git a/cloudwash/utils.py b/cloudwash/utils.py index a8cb282e3..d1a30f420 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -326,6 +326,7 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): # my_env["AWS_ACCESS_KEY_ID"] = settings.providers.ec2.username # my_env["AWS_SECRET_ACCESS_KEY"] = settings.providers.ec2.password try: + logger.info(f"Starting the destroy of OCP cluster: {cluster_name}") result = subprocess.run( [ 'openshift-install', @@ -345,7 +346,32 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): # Print logs from the openshift-installer cli logger.error(f"{err_msg}\n{result.stdout}") else: - logger.info(f"Successfully destroyed OCP cluster {cluster_name}") + logger.info(f"Successfully completed.\n") except Exception as ex: # Catch output of the subprocess run error logger.error(f"{err_msg}\n{ex}") + + +def validate_deletion_with_user_input(cluster_name) -> bool: + while True: + # confirm with the user + user_input = input(f'Confirm destroy of cluster {cluster_name} [Y/N]: ') + + # input validation + if user_input.lower() in ('y', 'yes'): + return True + elif user_input.lower() in ('n', 'no'): # using this elif for readability + return False + else: + # ... error handling ... + print(f'Error: Input {user_input} unrecognised. Please try again.') + + +def destroy_ocp_cluster_wrapper(metadata_path: str, cluster_name: str, user_validation=False): + if user_validation: + destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) + else: + if validate_deletion_with_user_input(cluster_name): + destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) + else: + logger.info(f"Skipping the deletion of the cluster: {cluster_name}\n") \ No newline at end of file From e802971927bbfc15d91cfeeb0835dde121d2cdfd Mon Sep 17 00:00:00 2001 From: oharan2 Date: Tue, 6 May 2025 13:19:13 +0300 Subject: [PATCH 07/11] Make filtering more strict --- cloudwash/entities/resources/ocps.py | 33 ++++++++++++++++++---------- cloudwash/utils.py | 15 +++++++++---- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index 12cc5251b..664633980 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -6,8 +6,8 @@ from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.entities.resources.base import OCPsCleanup from cloudwash.logger import logger -from cloudwash.utils import destroy_ocp_cluster_wrapper from cloudwash.utils import check_installer_exists +from cloudwash.utils import destroy_ocp_cluster_wrapper from cloudwash.utils import dry_data from cloudwash.utils import filter_resources_by_time_modified from cloudwash.utils import group_ocps_by_cluster @@ -42,13 +42,17 @@ def prepare_cluster_metadata(self, cluster_name: str, region: str, cleanup_dir: """ TODO Complete """ + cluster_id = "" # Prepare the data logger.info(f"\nPreparing metadata for cluster: {cluster_name}") cluster_metadata = { + "clusterName": f"{cluster_name}", + "clusterID": f"{cluster_id}", + "infraID": f"{cluster_name}", "aws": { "region": region, "identifier": [{f"{OCP_TAG_SUBSTR}{cluster_name}": "owned"}], - } + }, } metadata_file = os.path.join(cleanup_dir, "metadata.json") @@ -69,8 +73,11 @@ def cleanup(self, user_validation=False): region=self.client.cleaning_region, cleanup_dir=tmpdir, ) - destroy_ocp_cluster_wrapper(metadata_path=metadata_path, cluster_name=cluster_name, user_validation=user_validation) - + destroy_ocp_cluster_wrapper( + metadata_path=metadata_path, + cluster_name=cluster_name, + user_validation=user_validation, + ) class CleanAWSOcps(CleanOCPs): @@ -105,14 +112,16 @@ def list(self): leftover_ocp = True if leftover_ocp: - if not filter_resources_by_time_modified(time_threshold, resources=cluster_resources): - for r in cluster_resources: - print(r.date_modified) - import ipdb - ipdb.set_trace() - # Will not collect resources recorded during the SLA time - self._deletable["filtered_leftovers"].extend(cluster_resources) - self._deletable["ocp_clusters"].append(cluster_name) + # Filter all cluster resources by SLA to avoid deletion of resources that are + # in use, for example: EBS volume is being reused by the newly provisioned cluster + if filter_resources_by_time_modified(time_threshold, resources=cluster_resources): + # Will not collect resources recorded during the SLA time + self._deletable["filtered_leftovers"].extend(cluster_resources) + self._deletable["ocp_clusters"].append(cluster_name) + else: + logger.info( + f"Found resources in use, skipping the deletion of cluster {cluster_name}" + ) # Sort resources by type and cluster by name self._deletable["filtered_leftovers"] = sorted( diff --git a/cloudwash/utils.py b/cloudwash/utils.py index d1a30f420..7c9277164 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -291,7 +291,13 @@ def filter_resources_by_time_modified( Use the time_ref "1h" to collect resources that exist for more than an hour """ time_threshold = calculate_time_threshold(time_ref=time_threshold) - return all(r.date_modified <= time_threshold for r in resources) + if all(r.date_modified <= time_threshold for r in resources): + return True + else: + for r in resources: + if r.date_modified > time_threshold: + logger.debug(f"Id: {r.id}, Modified: {r.date_modified}, Type: {r.resource_type}") + return False def check_installer_exists(): @@ -326,7 +332,7 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): # my_env["AWS_ACCESS_KEY_ID"] = settings.providers.ec2.username # my_env["AWS_SECRET_ACCESS_KEY"] = settings.providers.ec2.password try: - logger.info(f"Starting the destroy of OCP cluster: {cluster_name}") + logger.info(f"Starting to destroy OCP cluster: {cluster_name}") result = subprocess.run( [ 'openshift-install', @@ -346,7 +352,8 @@ def destroy_ocp_cluster(metadata_path: str, cluster_name: str): # Print logs from the openshift-installer cli logger.error(f"{err_msg}\n{result.stdout}") else: - logger.info(f"Successfully completed.\n") + logger.debug(result.stdout) + logger.info("Successfully completed.") except Exception as ex: # Catch output of the subprocess run error logger.error(f"{err_msg}\n{ex}") @@ -374,4 +381,4 @@ def destroy_ocp_cluster_wrapper(metadata_path: str, cluster_name: str, user_vali if validate_deletion_with_user_input(cluster_name): destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) else: - logger.info(f"Skipping the deletion of the cluster: {cluster_name}\n") \ No newline at end of file + logger.info(f"Skipping the deletion of the cluster: {cluster_name}\n") From a69e10013a0576bf01da34c7f856b2eed9f78581 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Thu, 8 May 2025 12:10:23 +0300 Subject: [PATCH 08/11] Set clusters as LeftoverAWSOcp object. Update metadata --- cloudwash/constants.py | 11 ++ cloudwash/entities/resources/ocps.py | 185 +++++++++++++++++++-------- cloudwash/utils.py | 35 +++-- 3 files changed, 164 insertions(+), 67 deletions(-) diff --git a/cloudwash/constants.py b/cloudwash/constants.py index 30330c822..46cba6198 100644 --- a/cloudwash/constants.py +++ b/cloudwash/constants.py @@ -3,4 +3,15 @@ gce_data = ['VMS', 'NICS', 'DISCS'] vmware_data = ['VMS', 'NICS', 'DISCS'] container_data = ['CONTAINERS'] + +# OCP resources tags for filtering OCP_TAG_SUBSTR = "kubernetes.io/cluster/" +CLUSTER_NAME_TAGS = [ + "clusterName", + "api.openshift.com/name", +] +CLUSTER_ID_TAGS = [ + "openshiftClusterID", + "api.openshift.com/id", +] +CLUSTER_EXP_DATE_TAG = "expirationDate" diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index 664633980..863f883cc 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -1,8 +1,9 @@ -import json -import os import tempfile from cloudwash.config import settings +from cloudwash.constants import CLUSTER_EXP_DATE_TAG +from cloudwash.constants import CLUSTER_ID_TAGS +from cloudwash.constants import CLUSTER_NAME_TAGS from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.entities.resources.base import OCPsCleanup from cloudwash.logger import logger @@ -10,12 +11,76 @@ from cloudwash.utils import destroy_ocp_cluster_wrapper from cloudwash.utils import dry_data from cloudwash.utils import filter_resources_by_time_modified -from cloudwash.utils import group_ocps_by_cluster +from cloudwash.utils import write_metadata_file + + +class LeftoverAWSOcp: + def __init__(self, infra_id: str, region: str): + self.infra_id = infra_id + self.region = region + self.associated_resources = {"Resources": [], "Instances": []} + self._cluster_name = "" # Extract using resources tags + self._cluster_id = "" # Extract using resources tags + self._expiration_date = "" # Extract using resources tags + + def __repr__(self): + return ( + f'{self.infra_id}, Region: {self.region}, Instances: ' + f'{len(self.associated_resources.get("Instances"))}, other resources: ' + f'{len(self.associated_resources.get("Resources"))})' + ) + + def get_cluster_info( + self, + ): + for resources_types in self.associated_resources.values(): + for resource in resources_types: + if all([self._cluster_id, self._cluster_name, self._expiration_date]): + break + if not self._expiration_date: + exp_date = resource.get_tag_value(key=CLUSTER_EXP_DATE_TAG) + if exp_date: + self._expiration_date = exp_date + for name in CLUSTER_NAME_TAGS: + if not self._cluster_name: + name_tag = resource.get_tag_value(key=name) + if name_tag: + self._cluster_name = name_tag + for id in CLUSTER_ID_TAGS: + if not self._cluster_id: + id_tag = resource.get_tag_value(key=id) + if id_tag: + self._cluster_id = id_tag + + def get_cluster_metadata( + self, + ): + """ + TODO Complete + """ + # Prepare the data + infraID = self.infra_id + clusterName = self._cluster_name or infraID + clusterID = self._cluster_id or infraID + + logger.info(f"\nPreparing metadata for cluster: {infraID}") + + cluster_metadata = { + "clusterName": f"{clusterName}", + "clusterID": f"{clusterID}", + "infraID": f"{infraID}", + "aws": { + "region": self.region, + "identifier": [{f"{OCP_TAG_SUBSTR}{infraID}": "owned"}], + }, + } + return cluster_metadata class CleanOCPs(OCPsCleanup): def __init__(self, client): self.client = client + self.cleaning_region = self.client.cleaning_region self._deletable = {"ocp_clusters": [], "filtered_leftovers": []} self._cluster_map = {} self.list() @@ -38,40 +103,16 @@ def list(self): def remove(self): pass - def prepare_cluster_metadata(self, cluster_name: str, region: str, cleanup_dir: str): - """ - TODO Complete - """ - cluster_id = "" - # Prepare the data - logger.info(f"\nPreparing metadata for cluster: {cluster_name}") - cluster_metadata = { - "clusterName": f"{cluster_name}", - "clusterID": f"{cluster_id}", - "infraID": f"{cluster_name}", - "aws": { - "region": region, - "identifier": [{f"{OCP_TAG_SUBSTR}{cluster_name}": "owned"}], - }, - } - metadata_file = os.path.join(cleanup_dir, "metadata.json") - - # Write the JSON to the file - with open(metadata_file, "w") as f: - json.dump(cluster_metadata, f) - - logger.debug(f"Metadata written to {metadata_file}") - return metadata_file - def cleanup(self, user_validation=False): if not settings.dry_run: check_installer_exists() with tempfile.TemporaryDirectory() as tmpdir: for cluster_name in self._deletable["ocp_clusters"]: - metadata_path = self.prepare_cluster_metadata( - cluster_name=cluster_name, - region=self.client.cleaning_region, - cleanup_dir=tmpdir, + cluster = self._cluster_map[cluster_name] + cluster.get_cluster_info() + cluster.metadata = cluster.get_cluster_metadata() + metadata_path = write_metadata_file( + cluster_metadata=cluster.metadata, cleanup_dir=tmpdir ) destroy_ocp_cluster_wrapper( metadata_path=metadata_path, @@ -81,22 +122,44 @@ def cleanup(self, user_validation=False): class CleanAWSOcps(CleanOCPs): - def list(self): - resources = [] - time_threshold = settings.aws.criteria.ocps.get("SLA") - - ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) - for prefix in ocp_prefixes: - query = " ".join( - [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.client.cleaning_region}"] - ) - resources.extend(self.client.list_resources(query=query)) + def group_ocps_by_cluster(self, resources: list = None) -> dict: + """Group different types of AWS resources under their original OCP clusters - # Filter resources by SLA before deletion - self._cluster_map = group_ocps_by_cluster(resources=resources) - for cluster_name in self._cluster_map.keys(): - cluster_resources = self._cluster_map[cluster_name].get("Resources") - instances = self._cluster_map[cluster_name].get("Instances") + :param list resources: AWS resources collected by defined region and sla + :return: A dictionary with the clusters as keys and the associated resources as values + """ + if resources is None: + resources = [] + clusters_map = {} + + for resource in resources: + for key in resource.get_tags(regex=OCP_TAG_SUBSTR): + cluster_infra_id = key.get("Key") + if OCP_TAG_SUBSTR in cluster_infra_id: + # Considering the following format: "kubernetes.io/cluster/" + cluster_infra_id = cluster_infra_id.split(OCP_TAG_SUBSTR)[1] + if cluster_infra_id not in clusters_map.keys(): + clusters_map[cluster_infra_id] = LeftoverAWSOcp( + infra_id=cluster_infra_id, region=self.cleaning_region + ) + + # Set cluster's EC2 instances + if hasattr(resource, 'ec2_instance'): + clusters_map[cluster_infra_id].associated_resources["Instances"].append( + resource + ) + # Set resource under cluster + else: + clusters_map[cluster_infra_id].associated_resources["Resources"].append( + resource + ) + return clusters_map + + def _filter_deletable(self): + time_threshold = settings.aws.criteria.ocps.get("SLA") + for cluster in self._cluster_map.keys(): + resources = self._cluster_map[cluster].associated_resources.get("Resources") + instances = self._cluster_map[cluster].associated_resources.get("Instances") leftover_ocp = False if instances: @@ -106,23 +169,39 @@ def list(self): resources=instances, ): leftover_ocp = True + # If cluster is not selected due to other resources being used, + # the instances will only be printed in dry run self._deletable["filtered_leftovers"].extend(instances) else: - # For resources with no associated EC2 Instances, identify as leftovers + # For resources with no associated EC2 Instances, consider as leftovers leftover_ocp = True if leftover_ocp: # Filter all cluster resources by SLA to avoid deletion of resources that are - # in use, for example: EBS volume is being reused by the newly provisioned cluster - if filter_resources_by_time_modified(time_threshold, resources=cluster_resources): + # in use, like EBS volume or security groups + if filter_resources_by_time_modified(time_threshold, resources=resources): # Will not collect resources recorded during the SLA time - self._deletable["filtered_leftovers"].extend(cluster_resources) - self._deletable["ocp_clusters"].append(cluster_name) + self._deletable["ocp_clusters"].append(cluster) + self._deletable["filtered_leftovers"].extend(resources) else: logger.info( - f"Found resources in use, skipping the deletion of cluster {cluster_name}" + f"Found resources in use, skipping the deletion of cluster {cluster}" ) + def list(self): + resources = [] + + ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) + for prefix in ocp_prefixes: + query = " ".join( + [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.cleaning_region}"] + ) + resources.extend(self.client.list_resources(query=query)) + + # Filter resources by SLA before deletion + self._cluster_map = self.group_ocps_by_cluster(resources=resources) + self._filter_deletable() + # Sort resources by type and cluster by name self._deletable["filtered_leftovers"] = sorted( self._deletable["filtered_leftovers"], key=lambda x: x.resource_type diff --git a/cloudwash/utils.py b/cloudwash/utils.py index 7c9277164..e1cd4d7fa 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -1,5 +1,6 @@ """Common utils for cleanup activities of all CRs""" import importlib.resources +import json import os import subprocess from collections import namedtuple @@ -240,18 +241,19 @@ def group_ocps_by_cluster(resources: list = None) -> dict: for resource in resources: for key in resource.get_tags(regex=OCP_TAG_SUBSTR): - cluster_name = key.get("Key") - if OCP_TAG_SUBSTR in cluster_name: - cluster_name = cluster_name.split(OCP_TAG_SUBSTR)[1] - if cluster_name not in clusters_map.keys(): - clusters_map[cluster_name] = {"Resources": [], "Instances": []} + cluster_infra_id = key.get("Key") + if OCP_TAG_SUBSTR in cluster_infra_id: + # Considering the following format: "kubernetes.io/cluster/" + cluster_infra_id = cluster_infra_id.split(OCP_TAG_SUBSTR)[1] + if cluster_infra_id not in clusters_map.keys(): + clusters_map[cluster_infra_id] = {"Resources": [], "Instances": []} # Set cluster's EC2 instances if hasattr(resource, 'ec2_instance'): - clusters_map[cluster_name]["Instances"].append(resource) + clusters_map[cluster_infra_id]["Instances"].append(resource) # Set resource under cluster else: - clusters_map[cluster_name]["Resources"].append(resource) + clusters_map[cluster_infra_id]["Resources"].append(resource) return clusters_map @@ -291,13 +293,7 @@ def filter_resources_by_time_modified( Use the time_ref "1h" to collect resources that exist for more than an hour """ time_threshold = calculate_time_threshold(time_ref=time_threshold) - if all(r.date_modified <= time_threshold for r in resources): - return True - else: - for r in resources: - if r.date_modified > time_threshold: - logger.debug(f"Id: {r.id}, Modified: {r.date_modified}, Type: {r.resource_type}") - return False + return all(r.date_modified <= time_threshold for r in resources) def check_installer_exists(): @@ -382,3 +378,14 @@ def destroy_ocp_cluster_wrapper(metadata_path: str, cluster_name: str, user_vali destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) else: logger.info(f"Skipping the deletion of the cluster: {cluster_name}\n") + + +def write_metadata_file(cluster_metadata: dict, cleanup_dir: str): + metadata_file = os.path.join(cleanup_dir, "metadata.json") + + # Write the JSON to the file + with open(metadata_file, "w") as f: + json.dump(cluster_metadata, f) + + logger.debug(f"Metadata written to {metadata_file}") + return metadata_file From 24dccae759a97e8c63b71867d5f65331f6103b19 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Fri, 9 May 2025 14:51:00 +0300 Subject: [PATCH 09/11] Move AWS properties to CleanAWSOcps constructor --- cloudwash/entities/resources/ocps.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index 863f883cc..ebfa08d16 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -57,6 +57,7 @@ def get_cluster_metadata( ): """ TODO Complete + TODO Check if we can extract HostedZoneRole, clusterDomain """ # Prepare the data infraID = self.infra_id @@ -78,9 +79,7 @@ def get_cluster_metadata( class CleanOCPs(OCPsCleanup): - def __init__(self, client): - self.client = client - self.cleaning_region = self.client.cleaning_region + def __init__(self): self._deletable = {"ocp_clusters": [], "filtered_leftovers": []} self._cluster_map = {} self.list() @@ -122,6 +121,12 @@ def cleanup(self, user_validation=False): class CleanAWSOcps(CleanOCPs): + def __init__(self, client): + self.client = client + self.cleaning_region = self.client.cleaning_region + super().__init__() + + def group_ocps_by_cluster(self, resources: list = None) -> dict: """Group different types of AWS resources under their original OCP clusters From 2cffbe68784f5201e6e1e3716beab40dd91d8035 Mon Sep 17 00:00:00 2001 From: oharan2 Date: Fri, 9 May 2025 16:57:48 +0300 Subject: [PATCH 10/11] Move openshift-install under /usr/local/bin --- Dockerfile.dev | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile.dev b/Dockerfile.dev index 79d996acb..d4280389c 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -25,8 +25,7 @@ RUN /bin/bash -c 'cd ${CLOUDWASH_DIR}; for conffile in conf/*.yaml.template; do RUN curl -kf https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-install-linux.tar.gz \ -o openshift-install-linux.tar.gz && \ tar zxf openshift-install-linux.tar.gz && \ - mv openshift-install ${CLOUDWASH_DIR} && \ - chmod +x /${CLOUDWASH_DIR}/openshift-install + chmod +x openshift-install # adding .profile to environment variables, so it will be kept between shell sessions RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${APP_ROOT}/.profile @@ -35,6 +34,8 @@ RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${AP USER 0 RUN fix-permissions ${APP_ROOT} -P && \ git config --global --add safe.directory ${CLOUDWASH_DIR} +RUN touch /usr/local/bin/ && cp openshift-install /usr/local/bin/ + USER 1001 WORKDIR "${CLOUDWASH_DIR}" From 61607a67caf1062df3955499ccd1b0e30746682e Mon Sep 17 00:00:00 2001 From: oharan2 Date: Wed, 14 May 2025 13:04:13 +0300 Subject: [PATCH 11/11] Remove debug commands from Dockerfile --- Dockerfile.dev | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.dev b/Dockerfile.dev index d4280389c..f87191ce5 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -39,5 +39,5 @@ RUN touch /usr/local/bin/ && cp openshift-install /usr/local/bin/ USER 1001 WORKDIR "${CLOUDWASH_DIR}" -CMD ["swach"] -# CMD ["--help"] +ENTRYPOINT ["swach"] +CMD ["--help"]