Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ These include:
- Move_it_server and move_it_client
- Trollstalker (Pytroll-watchers should be prefered if possible)
- S3downloader
- Remove_it


.. toctree::
Expand All @@ -22,7 +23,7 @@ These include:

fetcher
s3downloader

remove_it

Indices and tables
==================
Expand Down
16 changes: 16 additions & 0 deletions docs/source/remove_it.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Remove_it
=========

Remove_it is a script that is made to clean directories, and optionally publish messages about the removed files.

An example config would look like::

[my_cleaning_job]
base_dir=/some/path/to/clean
templates=*
stat_time_method=st_mtime
recursive=true
include_hidden=false

Even if "include_hidden" is set to "true", ".keep" files will never be removed (useful to avoid directories from being
cleaned up)
10 changes: 7 additions & 3 deletions trollmoves/filescleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, publisher, section, info, dry_run=True):
self.info = info
self.dry_run = dry_run
self.recursive = self.info.get("recursive", False)
self.include_hidden = self.info.get("include_hidden", False)
self.stat_time_method = self.info.get("stat_time_method", "st_ctime")

def clean_dir(self, ref_time, pathname_template, **kwargs):
Expand All @@ -49,16 +50,17 @@ def clean_dir(self, ref_time, pathname_template, **kwargs):
LOGGER.info("Cleaning under %s", pathname_template)

if not self.recursive:
filepaths = glob(pathname_template)
filepaths = glob(pathname_template, include_hidden=self.include_hidden)
return self.clean_files_and_dirs(filepaths, ref_time)

section_files = 0
section_size = 0
removed = []

for pathname in glob(pathname_template):
for pathname in glob(pathname_template, include_hidden=self.include_hidden):
for dirpath, _dirnames, _ in os.walk(Path(pathname).parent, followlinks=True):
files_in_dir = glob(os.path.join(dirpath, Path(pathname_template).name))
files_in_dir = glob(os.path.join(dirpath, Path(pathname_template).name),
include_hidden=self.include_hidden)

if len(files_in_dir) == 0:
self._remove_empty_directory(dirpath)
Expand All @@ -83,6 +85,8 @@ def clean_files_and_dirs(self, filepaths, ref_time):
except OSError:
LOGGER.warning("Couldn't stat path=%s", str(filepath))
continue
if filepath.endswith(".keep"):
continue

if dt.datetime.fromtimestamp(getattr(stat, self.stat_time_method), tz=dt.timezone.utc) < ref_time:
if not self.dry_run:
Expand Down
91 changes: 81 additions & 10 deletions trollmoves/tests/test_remove_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,18 +257,31 @@
os.utime(fn.parent, times=(atime, mtime))
filepaths.append(fn)

# create …/data/another_subdir/subsubdir2/subsubsub/dummy5.dat
# where …/data/another_subdir/subsubdir2 symlinks to …/data_to_link

real_dir = tmp_path_factory.mktemp("data_to_link")
fn = basedir / "another_subdir" / "subsubdir2"
os.symlink(real_dir, fn)
# fn.mkdir()
symfn = fn / "dummy5.dat"
fn = real_dir / "dummy5.dat"
dir_name = basedir / "another_subdir" / "subsubdir2"
os.symlink(real_dir, dir_name)
symfn = dir_name / "subsubsub" / "dummy5.dat"
file5 = real_dir / "subsubsub" / "dummy5.dat"

Check notice on line 267 in trollmoves/tests/test_remove_files.py

View check run for this annotation

codefactor.io / CodeFactor

trollmoves/tests/test_remove_files.py#L267

Multiple spaces before operator. (E221)
file6 = real_dir / "subsubsub" / ".dummy6.dat"

Check notice on line 268 in trollmoves/tests/test_remove_files.py

View check run for this annotation

codefactor.io / CodeFactor

trollmoves/tests/test_remove_files.py#L268

Multiple spaces before operator. (E221)
file5.parent.mkdir()
file5.write_text(DUMMY_CONTENT)
os.utime(file5, times=(atime, mtime))
os.utime(file5.parent, times=(atime, mtime))
file6.write_text(DUMMY_CONTENT)
os.utime(file6, times=(atime, mtime))
os.utime(file6.parent, times=(atime, mtime))
filepaths.append(symfn)

# Make a sub-directory to keep:
fn = basedir / "subdir_to_keep" / ".keep"
fn.parent.mkdir()
fn.write_text(DUMMY_CONTENT)
os.utime(fn, times=(atime, mtime))
os.utime(fn.parent, times=(atime, mtime))
filepaths.append(symfn)

filepaths.append(fn)
return filepaths


Expand Down Expand Up @@ -398,14 +411,72 @@
"to": "some_users@xxx.yy",
"subject": "Cleanup Error on {hostname}",
"base_dir": f"{basedir}",
"templates": f"{basedir}/*",
"templates": "*",
"stat_time_method": "st_mtime",
"recursive": True,
"hours": "1"}

fcleaner = FilesCleaner(pub, section, info, dry_run=True)
fcleaner = FilesCleaner(pub, section, info, dry_run=False)

res = fcleaner.clean_section()

section_size, section_files, removed_files = res
assert str(basedir / "another_subdir" / "subsubdir2" / "subsubsub" / "dummy5.dat") in removed_files
# non-empty subdir
assert (basedir / "another_subdir" / "subsubdir2" / "subsubsub").exists()


def test_clean_removes_hidden_files(dummy_tree_of_some_files, tmp_path):
"""Test that cleaning follows links."""
pub = FakePublisher()
list_of_files_to_clean = dummy_tree_of_some_files

basedir = list_of_files_to_clean[0].parent

section = "mytest_files1"
info = {"mailhost": "localhost",
"to": "some_users@xxx.yy",
"subject": "Cleanup Error on {hostname}",
"base_dir": f"{basedir}",
"templates": "*",
"stat_time_method": "st_mtime",
"recursive": True,
"include_hidden": True,
"hours": "1"}

fcleaner = FilesCleaner(pub, section, info, dry_run=False)

res = fcleaner.clean_section()

section_size, section_files, removed_files = res
assert str(basedir / "another_subdir" / "subsubdir2" / "subsubsub" / ".dummy6.dat") in removed_files
# empty subdir, remove
assert not (basedir / "another_subdir" / "subsubdir2" / "subsubsub").exists()
# link, should not be removed
assert (basedir / "another_subdir" / "subsubdir2").exists()


def test_clean_keeps_keep_files(dummy_tree_of_some_files, tmp_path):

pub = FakePublisher()
list_of_files_to_clean = dummy_tree_of_some_files

basedir = list_of_files_to_clean[0].parent

section = "mytest_files1"
info = {"mailhost": "localhost",
"to": "some_users@xxx.yy",
"subject": "Cleanup Error on {hostname}",
"base_dir": f"{basedir}",
"templates": "*",
"stat_time_method": "st_mtime",
"recursive": True,
"include_hidden": True,
"hours": "1"}

fcleaner = FilesCleaner(pub, section, info, dry_run=False)

res = fcleaner.clean_section()

section_size, section_files, removed_files = res
assert str(basedir / "another_subdir" / "subsubdir2" / "dummy5.dat") in removed_files
assert (basedir / "subdir_to_keep").exists()
Loading
Loading