diff --git a/Dockerfile.dev b/Dockerfile.dev index 84874a7f..f87191ce 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -21,6 +21,12 @@ RUN git clone --depth=1 https://github.com/RedHatQE/cloudwash.git && \ RUN /bin/bash -c 'cd ${CLOUDWASH_DIR}; for conffile in conf/*.yaml.template; do cp -- "$conffile" "${conffile%.yaml.template}.yaml"; done' +# Install openshift-installer cli +RUN curl -kf https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-install-linux.tar.gz \ + -o openshift-install-linux.tar.gz && \ + tar zxf openshift-install-linux.tar.gz && \ + chmod +x openshift-install + # adding .profile to environment variables, so it will be kept between shell sessions RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${APP_ROOT}/.profile @@ -28,6 +34,8 @@ RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${AP USER 0 RUN fix-permissions ${APP_ROOT} -P && \ git config --global --add safe.directory ${CLOUDWASH_DIR} +RUN touch /usr/local/bin/ && cp openshift-install /usr/local/bin/ + USER 1001 WORKDIR "${CLOUDWASH_DIR}" diff --git a/cloudwash/cli.py b/cloudwash/cli.py index 4713d529..0d48528e 100644 --- a/cloudwash/cli.py +++ b/cloudwash/cli.py @@ -105,8 +105,9 @@ def azure(ctx, vms, discs, nics, images, pips, _all, _all_rg): is_flag=True, help="Remove only unused OCP Cluster occupied resources from the provider", ) +@click.option("-y", "--yes", is_flag=True, help="Answer yes to all prompts") @click.pass_context -def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all): +def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, yes, _all): # Validate Amazon Settings validate_provider(ctx.command.name) is_dry_run = ctx.parent.params["dry"] @@ -118,6 +119,7 @@ def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all): pips=pips, stacks=stacks, ocps=ocps, + yes=yes, _all=_all, dry_run=is_dry_run, ) diff --git a/cloudwash/constants.py b/cloudwash/constants.py index 677f581f..46cba619 100644 --- a/cloudwash/constants.py +++ b/cloudwash/constants.py @@ -3,3 +3,15 @@ gce_data = ['VMS', 'NICS', 'DISCS'] vmware_data = ['VMS', 'NICS', 'DISCS'] container_data = ['CONTAINERS'] + +# OCP resources tags for filtering +OCP_TAG_SUBSTR = "kubernetes.io/cluster/" +CLUSTER_NAME_TAGS = [ + "clusterName", + "api.openshift.com/name", +] +CLUSTER_ID_TAGS = [ + "openshiftClusterID", + "api.openshift.com/id", +] +CLUSTER_EXP_DATE_TAG = "expirationDate" diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py index b4778519..ebfa08d1 100644 --- a/cloudwash/entities/resources/ocps.py +++ b/cloudwash/entities/resources/ocps.py @@ -1,20 +1,100 @@ +import tempfile + from cloudwash.config import settings +from cloudwash.constants import CLUSTER_EXP_DATE_TAG +from cloudwash.constants import CLUSTER_ID_TAGS +from cloudwash.constants import CLUSTER_NAME_TAGS +from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.entities.resources.base import OCPsCleanup -from cloudwash.utils import calculate_time_threshold +from cloudwash.logger import logger +from cloudwash.utils import check_installer_exists +from cloudwash.utils import destroy_ocp_cluster_wrapper from cloudwash.utils import dry_data from cloudwash.utils import filter_resources_by_time_modified -from cloudwash.utils import group_ocps_by_cluster -from cloudwash.utils import OCP_TAG_SUBSTR +from cloudwash.utils import write_metadata_file + + +class LeftoverAWSOcp: + def __init__(self, infra_id: str, region: str): + self.infra_id = infra_id + self.region = region + self.associated_resources = {"Resources": [], "Instances": []} + self._cluster_name = "" # Extract using resources tags + self._cluster_id = "" # Extract using resources tags + self._expiration_date = "" # Extract using resources tags + + def __repr__(self): + return ( + f'{self.infra_id}, Region: {self.region}, Instances: ' + f'{len(self.associated_resources.get("Instances"))}, other resources: ' + f'{len(self.associated_resources.get("Resources"))})' + ) + + def get_cluster_info( + self, + ): + for resources_types in self.associated_resources.values(): + for resource in resources_types: + if all([self._cluster_id, self._cluster_name, self._expiration_date]): + break + if not self._expiration_date: + exp_date = resource.get_tag_value(key=CLUSTER_EXP_DATE_TAG) + if exp_date: + self._expiration_date = exp_date + for name in CLUSTER_NAME_TAGS: + if not self._cluster_name: + name_tag = resource.get_tag_value(key=name) + if name_tag: + self._cluster_name = name_tag + for id in CLUSTER_ID_TAGS: + if not self._cluster_id: + id_tag = resource.get_tag_value(key=id) + if id_tag: + self._cluster_id = id_tag + + def get_cluster_metadata( + self, + ): + """ + TODO Complete + TODO Check if we can extract HostedZoneRole, clusterDomain + """ + # Prepare the data + infraID = self.infra_id + clusterName = self._cluster_name or infraID + clusterID = self._cluster_id or infraID + + logger.info(f"\nPreparing metadata for cluster: {infraID}") + + cluster_metadata = { + "clusterName": f"{clusterName}", + "clusterID": f"{clusterID}", + "infraID": f"{infraID}", + "aws": { + "region": self.region, + "identifier": [{f"{OCP_TAG_SUBSTR}{infraID}": "owned"}], + }, + } + return cluster_metadata class CleanOCPs(OCPsCleanup): - def __init__(self, client): - self.client = client - self._delete = [] + def __init__(self): + self._deletable = {"ocp_clusters": [], "filtered_leftovers": []} + self._cluster_map = {} self.list() def _set_dry(self): - dry_data['OCPS']['delete'] = self._delete + def _make_printable(resources: list): + return { + ocp.resource_type: [ + r.name for r in resources if r.resource_type == ocp.resource_type + ] + for ocp in resources + } + + dry_data['OCPS']['delete'] = _make_printable(self._deletable["filtered_leftovers"]) + dry_data['OCPS']['clusters'] = self._deletable["ocp_clusters"] def list(self): pass @@ -22,42 +102,114 @@ def list(self): def remove(self): pass - def cleanup(self): + def cleanup(self, user_validation=False): if not settings.dry_run: - self.remove() + check_installer_exists() + with tempfile.TemporaryDirectory() as tmpdir: + for cluster_name in self._deletable["ocp_clusters"]: + cluster = self._cluster_map[cluster_name] + cluster.get_cluster_info() + cluster.metadata = cluster.get_cluster_metadata() + metadata_path = write_metadata_file( + cluster_metadata=cluster.metadata, cleanup_dir=tmpdir + ) + destroy_ocp_cluster_wrapper( + metadata_path=metadata_path, + cluster_name=cluster_name, + user_validation=user_validation, + ) class CleanAWSOcps(CleanOCPs): - def list(self): - resources = [] - time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.get("SLA")) + def __init__(self, client): + self.client = client + self.cleaning_region = self.client.cleaning_region + super().__init__() - ocp_prefix = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) - for prefix in ocp_prefix: - query = " ".join( - [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.client.cleaning_region}"] - ) - resources.extend(self.client.list_resources(query=query)) - # Prepare resources to be filtered before deletion - cluster_map = group_ocps_by_cluster(resources=resources) - for cluster_name in cluster_map.keys(): - cluster_resources = cluster_map[cluster_name].get("Resources") - instances = cluster_map[cluster_name].get("Instances") + def group_ocps_by_cluster(self, resources: list = None) -> dict: + """Group different types of AWS resources under their original OCP clusters + + :param list resources: AWS resources collected by defined region and sla + :return: A dictionary with the clusters as keys and the associated resources as values + """ + if resources is None: + resources = [] + clusters_map = {} + + for resource in resources: + for key in resource.get_tags(regex=OCP_TAG_SUBSTR): + cluster_infra_id = key.get("Key") + if OCP_TAG_SUBSTR in cluster_infra_id: + # Considering the following format: "kubernetes.io/cluster/" + cluster_infra_id = cluster_infra_id.split(OCP_TAG_SUBSTR)[1] + if cluster_infra_id not in clusters_map.keys(): + clusters_map[cluster_infra_id] = LeftoverAWSOcp( + infra_id=cluster_infra_id, region=self.cleaning_region + ) + + # Set cluster's EC2 instances + if hasattr(resource, 'ec2_instance'): + clusters_map[cluster_infra_id].associated_resources["Instances"].append( + resource + ) + # Set resource under cluster + else: + clusters_map[cluster_infra_id].associated_resources["Resources"].append( + resource + ) + return clusters_map + + def _filter_deletable(self): + time_threshold = settings.aws.criteria.ocps.get("SLA") + for cluster in self._cluster_map.keys(): + resources = self._cluster_map[cluster].associated_resources.get("Resources") + instances = self._cluster_map[cluster].associated_resources.get("Instances") + leftover_ocp = False if instances: # For resources with associated EC2 Instances, filter by Instances SLA - if not filter_resources_by_time_modified( + if filter_resources_by_time_modified( time_threshold, resources=instances, ): - self._delete.extend(cluster_resources) + leftover_ocp = True + # If cluster is not selected due to other resources being used, + # the instances will only be printed in dry run + self._deletable["filtered_leftovers"].extend(instances) else: - # For resources with no associated EC2 Instances, identify as leftovers - self._delete.extend( - filter_resources_by_time_modified(time_threshold, resources=cluster_resources) - ) + # For resources with no associated EC2 Instances, consider as leftovers + leftover_ocp = True + + if leftover_ocp: + # Filter all cluster resources by SLA to avoid deletion of resources that are + # in use, like EBS volume or security groups + if filter_resources_by_time_modified(time_threshold, resources=resources): + # Will not collect resources recorded during the SLA time + self._deletable["ocp_clusters"].append(cluster) + self._deletable["filtered_leftovers"].extend(resources) + else: + logger.info( + f"Found resources in use, skipping the deletion of cluster {cluster}" + ) + + def list(self): + resources = [] + + ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""]) + for prefix in ocp_prefixes: + query = " ".join( + [f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.cleaning_region}"] + ) + resources.extend(self.client.list_resources(query=query)) + + # Filter resources by SLA before deletion + self._cluster_map = self.group_ocps_by_cluster(resources=resources) + self._filter_deletable() - # Sort resources by type - self._delete = sorted(self._delete, key=lambda x: x.resource_type) + # Sort resources by type and cluster by name + self._deletable["filtered_leftovers"] = sorted( + self._deletable["filtered_leftovers"], key=lambda x: x.resource_type + ) + self._deletable["ocp_clusters"] = sorted(self._deletable["ocp_clusters"]) self._set_dry() diff --git a/cloudwash/providers/aws.py b/cloudwash/providers/aws.py index f9959827..205df156 100644 --- a/cloudwash/providers/aws.py +++ b/cloudwash/providers/aws.py @@ -13,6 +13,7 @@ def cleanup(**kwargs): is_dry_run = kwargs.get("dry_run", False) + user_validate = kwargs.get("yes", False) dry_data['PROVIDER'] = "AWS" regions = settings.aws.auth.regions all_data = [] @@ -30,7 +31,7 @@ def cleanup(**kwargs): for items in data: dry_data[items]['delete'] = [] logger.info(f"\nResources from the region: {region}") - awscleanup.ocps.cleanup() + awscleanup.ocps.cleanup(user_validation=user_validate) if is_dry_run: echo_dry(dry_data) all_data.append(deepcopy(dry_data)) diff --git a/cloudwash/utils.py b/cloudwash/utils.py index 60e3256d..e1cd4d7f 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -1,5 +1,8 @@ """Common utils for cleanup activities of all CRs""" import importlib.resources +import json +import os +import subprocess from collections import namedtuple from datetime import datetime @@ -18,10 +21,9 @@ from wrapanapi.systems.ec2 import ResourceExplorerResource from cloudwash.assets import css +from cloudwash.constants import OCP_TAG_SUBSTR from cloudwash.logger import logger -OCP_TAG_SUBSTR = "kubernetes.io/cluster/" - _vms_dict = {"VMS": {"delete": [], "stop": [], "skip": []}} _containers_dict = {"CONTAINERS": {"delete": [], "stop": [], "skip": []}} @@ -29,7 +31,7 @@ "NICS": {"delete": []}, "DISCS": {"delete": []}, "PIPS": {"delete": []}, - "OCPS": {"delete": []}, + "OCPS": {"delete": [], "clusters": []}, "RESOURCES": {"delete": []}, "STACKS": {"delete": []}, "IMAGES": {"delete": []}, @@ -62,12 +64,8 @@ def resourcewise_data(dry_data=None) -> dict: "deletable_pips": dry_data["PIPS"]["delete"] if "PIPS" in dry_data else None, "deletable_resources": dry_data["RESOURCES"]["delete"], "deletable_stacks": dry_data["STACKS"]["delete"] if "STACKS" in dry_data else None, - "deletable_ocps": { - ocp.resource_type: [ - r.name for r in dry_data["OCPS"]["delete"] if r.resource_type == ocp.resource_type - ] - for ocp in dry_data["OCPS"]["delete"] - }, + "clusters_ocps": dry_data["OCPS"]["clusters"], + "deletable_ocps": dry_data["OCPS"]["delete"], } return resource_data @@ -243,18 +241,19 @@ def group_ocps_by_cluster(resources: list = None) -> dict: for resource in resources: for key in resource.get_tags(regex=OCP_TAG_SUBSTR): - cluster_name = key.get("Key") - if OCP_TAG_SUBSTR in cluster_name: - cluster_name = cluster_name.split(OCP_TAG_SUBSTR)[1] - if cluster_name not in clusters_map.keys(): - clusters_map[cluster_name] = {"Resources": [], "Instances": []} + cluster_infra_id = key.get("Key") + if OCP_TAG_SUBSTR in cluster_infra_id: + # Considering the following format: "kubernetes.io/cluster/" + cluster_infra_id = cluster_infra_id.split(OCP_TAG_SUBSTR)[1] + if cluster_infra_id not in clusters_map.keys(): + clusters_map[cluster_infra_id] = {"Resources": [], "Instances": []} # Set cluster's EC2 instances if hasattr(resource, 'ec2_instance'): - clusters_map[cluster_name]["Instances"].append(resource) + clusters_map[cluster_infra_id]["Instances"].append(resource) # Set resource under cluster else: - clusters_map[cluster_name]["Resources"].append(resource) + clusters_map[cluster_infra_id]["Resources"].append(resource) return clusters_map @@ -283,26 +282,110 @@ def calculate_time_threshold(time_ref=""): def filter_resources_by_time_modified( time_threshold, resources: list[ResourceExplorerResource] = None, -) -> list: +) -> bool: """ Filter list of AWS resources by checking modification date ("LastReportedAt") - :param datetime time_threshold: Time filtering criteria + :param str time_threshold: Time filtering criteria :param list resources: List of resources to be filtered out - :return: list of resources that last modified before time threshold + :return: True if all resources in the list last modified before time threshold :Example: Use the time_ref "1h" to collect resources that exist for more than an hour """ - filtered_resources = [] + time_threshold = calculate_time_threshold(time_ref=time_threshold) + return all(r.date_modified <= time_threshold for r in resources) + + +def check_installer_exists(): + try: + subprocess.run( + ['openshift-install', '--help'], + stdout=subprocess.DEVNULL, # Suppress stdout + stderr=subprocess.DEVNULL, # Suppress stderr + check=False, # Ignore failure + ) + logger.info("Found Openshift Installer") + except FileNotFoundError: + logger.exception( + "Openshift Installer CLI doesn't exists" + "\nUse a docker container env for cleanup or locally install using: " + "https://mirror.openshift.com/pub/openshift-v4/x86_64/" + "clients/ocp/stable/openshift-install-linux.tar." + "\nFor more information check out: https://github.com/openshift/installer." + ) + exit(1) + + +def destroy_ocp_cluster(metadata_path: str, cluster_name: str): + if metadata_path == "" or not os.path.exists(metadata_path): + # Return without raising exception, will try to fetch next OCP cluster info + logger.error(f"Failed to load cluster info from metadata path: {metadata_path}.") + else: + err_msg = f"Failed to cleanup OCP cluster {cluster_name}. Failure info:" + cleanup_dir = metadata_path.split("metadata.json")[0] + env = os.environ.copy() + # if not env.get("AWS_ACCESS_KEY_ID"): + # my_env["AWS_ACCESS_KEY_ID"] = settings.providers.ec2.username + # my_env["AWS_SECRET_ACCESS_KEY"] = settings.providers.ec2.password + try: + logger.info(f"Starting to destroy OCP cluster: {cluster_name}") + result = subprocess.run( + [ + 'openshift-install', + 'destroy', + 'cluster', + '--dir', + cleanup_dir, + '--log-level=debug', + ], + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + check=False, + ) # Use check=True to raise an exception for non-zero return codes + if result.returncode != 0: + # Print logs from the openshift-installer cli + logger.error(f"{err_msg}\n{result.stdout}") + else: + logger.debug(result.stdout) + logger.info("Successfully completed.") + except Exception as ex: + # Catch output of the subprocess run error + logger.error(f"{err_msg}\n{ex}") + + +def validate_deletion_with_user_input(cluster_name) -> bool: + while True: + # confirm with the user + user_input = input(f'Confirm destroy of cluster {cluster_name} [Y/N]: ') + + # input validation + if user_input.lower() in ('y', 'yes'): + return True + elif user_input.lower() in ('n', 'no'): # using this elif for readability + return False + else: + # ... error handling ... + print(f'Error: Input {user_input} unrecognised. Please try again.') + + +def destroy_ocp_cluster_wrapper(metadata_path: str, cluster_name: str, user_validation=False): + if user_validation: + destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) + else: + if validate_deletion_with_user_input(cluster_name): + destroy_ocp_cluster(metadata_path=metadata_path, cluster_name=cluster_name) + else: + logger.info(f"Skipping the deletion of the cluster: {cluster_name}\n") - for resource in resources: - # Will not collect resources recorded during the SLA time - if resource.date_modified > time_threshold: - continue - filtered_resources.append(resource) - return filtered_resources +def write_metadata_file(cluster_metadata: dict, cleanup_dir: str): + metadata_file = os.path.join(cleanup_dir, "metadata.json") + + # Write the JSON to the file + with open(metadata_file, "w") as f: + json.dump(cluster_metadata, f) -def delete_ocp(ocp): - # WIP: add support for deletion - pass + logger.debug(f"Metadata written to {metadata_file}") + return metadata_file