Skip to content
8 changes: 8 additions & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,21 @@ RUN git clone --depth=1 https://github.com/RedHatQE/cloudwash.git && \

RUN /bin/bash -c 'cd ${CLOUDWASH_DIR}; for conffile in conf/*.yaml.template; do cp -- "$conffile" "${conffile%.yaml.template}.yaml"; done'

# Install openshift-installer cli
RUN curl -kf https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-install-linux.tar.gz \
-o openshift-install-linux.tar.gz && \
tar zxf openshift-install-linux.tar.gz && \
chmod +x openshift-install

# adding .profile to environment variables, so it will be kept between shell sessions
RUN echo "source ${APP_ROOT}/.profile" >> ${APP_ROOT}/bin/activate && touch ${APP_ROOT}/.profile

# arbitrary UID handling starting from virtualenv directory for pip permissions
USER 0
RUN fix-permissions ${APP_ROOT} -P && \
git config --global --add safe.directory ${CLOUDWASH_DIR}
RUN touch /usr/local/bin/ && cp openshift-install /usr/local/bin/

USER 1001
WORKDIR "${CLOUDWASH_DIR}"

Expand Down
4 changes: 3 additions & 1 deletion cloudwash/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ def azure(ctx, vms, discs, nics, images, pips, _all, _all_rg):
is_flag=True,
help="Remove only unused OCP Cluster occupied resources from the provider",
)
@click.option("-y", "--yes", is_flag=True, help="Answer yes to all prompts")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't have choice to say no, or if we should not provide the choice, let's remove the option here.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alternative is -d, say no to all cluster deletion makes the deletion mode redundant.

In deletion mode, if not passing -y (yes to all prompts), you can go cluster by cluster and decide which one to clean up.
Therefore you're in deletion mode, but you can safely exclude some of it.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the framework that Cloudwash comes up with for excluding is to have the resource in exceptions list. That means the user has to pre-choose the clusters that user dont want to delete resources from.

Can we move to exceptions mode, How that would impact your use case ?

@click.pass_context
def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all):
def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, yes, _all):
# Validate Amazon Settings
validate_provider(ctx.command.name)
is_dry_run = ctx.parent.params["dry"]
Expand All @@ -118,6 +119,7 @@ def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all):
pips=pips,
stacks=stacks,
ocps=ocps,
yes=yes,
_all=_all,
dry_run=is_dry_run,
)
Expand Down
12 changes: 12 additions & 0 deletions cloudwash/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,15 @@
gce_data = ['VMS', 'NICS', 'DISCS']
vmware_data = ['VMS', 'NICS', 'DISCS']
container_data = ['CONTAINERS']

# OCP resources tags for filtering
OCP_TAG_SUBSTR = "kubernetes.io/cluster/"
CLUSTER_NAME_TAGS = [
"clusterName",
"api.openshift.com/name",
]
CLUSTER_ID_TAGS = [
"openshiftClusterID",
"api.openshift.com/id",
]
CLUSTER_EXP_DATE_TAG = "expirationDate"
Comment thread
jyejare marked this conversation as resolved.
214 changes: 183 additions & 31 deletions cloudwash/entities/resources/ocps.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,215 @@
import tempfile

from cloudwash.config import settings
from cloudwash.constants import CLUSTER_EXP_DATE_TAG
from cloudwash.constants import CLUSTER_ID_TAGS
from cloudwash.constants import CLUSTER_NAME_TAGS
from cloudwash.constants import OCP_TAG_SUBSTR
from cloudwash.entities.resources.base import OCPsCleanup
from cloudwash.utils import calculate_time_threshold
from cloudwash.logger import logger
from cloudwash.utils import check_installer_exists
from cloudwash.utils import destroy_ocp_cluster_wrapper
from cloudwash.utils import dry_data
from cloudwash.utils import filter_resources_by_time_modified
from cloudwash.utils import group_ocps_by_cluster
from cloudwash.utils import OCP_TAG_SUBSTR
from cloudwash.utils import write_metadata_file


class LeftoverAWSOcp:
def __init__(self, infra_id: str, region: str):
self.infra_id = infra_id
self.region = region
self.associated_resources = {"Resources": [], "Instances": []}
self._cluster_name = "" # Extract using resources tags
self._cluster_id = "" # Extract using resources tags
self._expiration_date = "" # Extract using resources tags

def __repr__(self):
return (
f'{self.infra_id}, Region: {self.region}, Instances: '
f'{len(self.associated_resources.get("Instances"))}, other resources: '
f'{len(self.associated_resources.get("Resources"))})'
)

def get_cluster_info(
self,
):
for resources_types in self.associated_resources.values():
for resource in resources_types:
if all([self._cluster_id, self._cluster_name, self._expiration_date]):
break
if not self._expiration_date:
exp_date = resource.get_tag_value(key=CLUSTER_EXP_DATE_TAG)
if exp_date:
self._expiration_date = exp_date
for name in CLUSTER_NAME_TAGS:
if not self._cluster_name:
name_tag = resource.get_tag_value(key=name)
if name_tag:
self._cluster_name = name_tag
for id in CLUSTER_ID_TAGS:
if not self._cluster_id:
id_tag = resource.get_tag_value(key=id)
if id_tag:
self._cluster_id = id_tag

def get_cluster_metadata(
self,
):
"""
TODO Complete
TODO Check if we can extract HostedZoneRole, clusterDomain
"""
# Prepare the data
infraID = self.infra_id
clusterName = self._cluster_name or infraID
clusterID = self._cluster_id or infraID

logger.info(f"\nPreparing metadata for cluster: {infraID}")

cluster_metadata = {
"clusterName": f"{clusterName}",
"clusterID": f"{clusterID}",
"infraID": f"{infraID}",
"aws": {
"region": self.region,
"identifier": [{f"{OCP_TAG_SUBSTR}{infraID}": "owned"}],
},
}
return cluster_metadata


class CleanOCPs(OCPsCleanup):
def __init__(self, client):
self.client = client
self._delete = []
def __init__(self):
self._deletable = {"ocp_clusters": [], "filtered_leftovers": []}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its good to be consistant with var names, That is self_delete = {}

self._cluster_map = {}
self.list()

def _set_dry(self):
dry_data['OCPS']['delete'] = self._delete
def _make_printable(resources: list):
return {
ocp.resource_type: [
r.name for r in resources if r.resource_type == ocp.resource_type
]
for ocp in resources
}

dry_data['OCPS']['delete'] = _make_printable(self._deletable["filtered_leftovers"])
dry_data['OCPS']['clusters'] = self._deletable["ocp_clusters"]

def list(self):
pass

def remove(self):
pass

def cleanup(self):
def cleanup(self, user_validation=False):
if not settings.dry_run:
self.remove()
check_installer_exists()
with tempfile.TemporaryDirectory() as tmpdir:
for cluster_name in self._deletable["ocp_clusters"]:
cluster = self._cluster_map[cluster_name]
cluster.get_cluster_info()
cluster.metadata = cluster.get_cluster_metadata()
metadata_path = write_metadata_file(
cluster_metadata=cluster.metadata, cleanup_dir=tmpdir
)
destroy_ocp_cluster_wrapper(
metadata_path=metadata_path,
cluster_name=cluster_name,
user_validation=user_validation,
)


class CleanAWSOcps(CleanOCPs):
def list(self):
resources = []
time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.get("SLA"))
def __init__(self, client):
self.client = client
self.cleaning_region = self.client.cleaning_region
super().__init__()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesnt seems to be required.


ocp_prefix = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""])
for prefix in ocp_prefix:
query = " ".join(
[f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.client.cleaning_region}"]
)
resources.extend(self.client.list_resources(query=query))

# Prepare resources to be filtered before deletion
cluster_map = group_ocps_by_cluster(resources=resources)
for cluster_name in cluster_map.keys():
cluster_resources = cluster_map[cluster_name].get("Resources")
instances = cluster_map[cluster_name].get("Instances")
def group_ocps_by_cluster(self, resources: list = None) -> dict:
"""Group different types of AWS resources under their original OCP clusters

:param list resources: AWS resources collected by defined region and sla
:return: A dictionary with the clusters as keys and the associated resources as values
"""
if resources is None:
resources = []
clusters_map = {}

for resource in resources:
for key in resource.get_tags(regex=OCP_TAG_SUBSTR):
cluster_infra_id = key.get("Key")
if OCP_TAG_SUBSTR in cluster_infra_id:
# Considering the following format: "kubernetes.io/cluster/<CLUSTER_INFRA_ID>"
cluster_infra_id = cluster_infra_id.split(OCP_TAG_SUBSTR)[1]
if cluster_infra_id not in clusters_map.keys():
clusters_map[cluster_infra_id] = LeftoverAWSOcp(
infra_id=cluster_infra_id, region=self.cleaning_region
)

# Set cluster's EC2 instances
if hasattr(resource, 'ec2_instance'):
clusters_map[cluster_infra_id].associated_resources["Instances"].append(
resource
)
# Set resource under cluster
else:
clusters_map[cluster_infra_id].associated_resources["Resources"].append(
resource
)
return clusters_map

def _filter_deletable(self):
time_threshold = settings.aws.criteria.ocps.get("SLA")
for cluster in self._cluster_map.keys():
resources = self._cluster_map[cluster].associated_resources.get("Resources")
instances = self._cluster_map[cluster].associated_resources.get("Instances")
leftover_ocp = False

if instances:
# For resources with associated EC2 Instances, filter by Instances SLA
if not filter_resources_by_time_modified(
if filter_resources_by_time_modified(
time_threshold,
resources=instances,
):
self._delete.extend(cluster_resources)
leftover_ocp = True
# If cluster is not selected due to other resources being used,
# the instances will only be printed in dry run
self._deletable["filtered_leftovers"].extend(instances)
else:
# For resources with no associated EC2 Instances, identify as leftovers
self._delete.extend(
filter_resources_by_time_modified(time_threshold, resources=cluster_resources)
)
# For resources with no associated EC2 Instances, consider as leftovers
leftover_ocp = True

if leftover_ocp:
# Filter all cluster resources by SLA to avoid deletion of resources that are
# in use, like EBS volume or security groups
if filter_resources_by_time_modified(time_threshold, resources=resources):
# Will not collect resources recorded during the SLA time
self._deletable["ocp_clusters"].append(cluster)
self._deletable["filtered_leftovers"].extend(resources)
else:
logger.info(
f"Found resources in use, skipping the deletion of cluster {cluster}"
)

def list(self):
resources = []

ocp_prefixes = list(settings.aws.criteria.ocps.get("OCP_PREFIXES") or [""])
for prefix in ocp_prefixes:
query = " ".join(
[f"tag.key:{OCP_TAG_SUBSTR}{prefix}*", f"region:{self.cleaning_region}"]
)
resources.extend(self.client.list_resources(query=query))

# Filter resources by SLA before deletion
self._cluster_map = self.group_ocps_by_cluster(resources=resources)
self._filter_deletable()

# Sort resources by type
self._delete = sorted(self._delete, key=lambda x: x.resource_type)
# Sort resources by type and cluster by name
self._deletable["filtered_leftovers"] = sorted(
self._deletable["filtered_leftovers"], key=lambda x: x.resource_type
)
self._deletable["ocp_clusters"] = sorted(self._deletable["ocp_clusters"])
self._set_dry()
3 changes: 2 additions & 1 deletion cloudwash/providers/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

def cleanup(**kwargs):
is_dry_run = kwargs.get("dry_run", False)
user_validate = kwargs.get("yes", False)
dry_data['PROVIDER'] = "AWS"
regions = settings.aws.auth.regions
all_data = []
Expand All @@ -30,7 +31,7 @@ def cleanup(**kwargs):
for items in data:
dry_data[items]['delete'] = []
logger.info(f"\nResources from the region: {region}")
awscleanup.ocps.cleanup()
awscleanup.ocps.cleanup(user_validation=user_validate)
if is_dry_run:
echo_dry(dry_data)
all_data.append(deepcopy(dry_data))
Expand Down
Loading
Loading