-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcleanup_summary.py
More file actions
executable file
·134 lines (116 loc) · 4.78 KB
/
cleanup_summary.py
File metadata and controls
executable file
·134 lines (116 loc) · 4.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
import os
import pwd
import grp
from datetime import datetime
import heapq
import sys
from itertools import count
import argparse
def human_readable_size(size_bytes):
if size_bytes == 0:
return "0 B"
units = ["B", "KB", "MB", "GB", "TB", "PB"]
i = 0
while size_bytes >= 1024 and i < len(units) - 1:
size_bytes /= 1024.0
i += 1
return f"{size_bytes:.2f} {units[i]}"
def safe_get_owner(uid):
try:
return pwd.getpwuid(uid).pw_name
except KeyError:
return str(uid)
def safe_get_group(gid):
try:
return grp.getgrgid(gid).gr_name
except KeyError:
return str(gid)
def get_file_info(path):
try:
st = os.stat(path, follow_symlinks=False)
owner = safe_get_owner(st.st_uid)
group = safe_get_group(st.st_gid)
size = st.st_size
ctime = datetime.fromtimestamp(st.st_ctime)
mtime = datetime.fromtimestamp(st.st_mtime)
atime = datetime.fromtimestamp(st.st_atime)
recent_time = max(atime, mtime, ctime)
return {
"path": path,
"owner": owner,
"group": group,
"size": size,
"ctime": ctime,
"mtime": mtime,
"atime": atime,
"recent_time": recent_time
}
except Exception:
return None
def summarize_directory(root_path, top_n_largest=3, top_n_recent=5):
if not os.path.isdir(root_path):
print(f"Error: Directory {root_path} does not exist.")
sys.exit(1)
largest_files_heap = []
recent_files_heap = []
counter = count()
total_files = 0
total_dirs = 0
total_size = 0
most_recent_access = None
for dirpath, dirnames, filenames in os.walk(root_path, followlinks=False):
# Skip symlinked directories
dirnames[:] = [d for d in dirnames if not os.path.islink(os.path.join(dirpath, d))]
total_dirs += len(dirnames)
for fname in filenames:
fpath = os.path.join(dirpath, fname)
if os.path.islink(fpath):
continue
info = get_file_info(fpath)
if info is None:
continue
total_files += 1
total_size += info["size"]
# Track most recent access across all files
if (most_recent_access is None) or (info["atime"] > most_recent_access):
most_recent_access = info["atime"]
# Track largest files
heapq.heappush(largest_files_heap, (info["size"], next(counter), info))
if len(largest_files_heap) > top_n_largest:
heapq.heappop(largest_files_heap)
# Track most recently used files
recent_time = max(info["atime"], info["mtime"], info["ctime"])
heapq.heappush(recent_files_heap, (recent_time.timestamp(), next(counter), info))
if len(recent_files_heap) > top_n_recent:
heapq.heappop(recent_files_heap)
# Summary
print(f"\nSummary for directory: {root_path}")
print(f"Total subdirectories: {total_dirs}")
print(f"Total files: {total_files}")
print(f"Total disk usage: {human_readable_size(total_size)}")
if most_recent_access:
print(f"Most recent access time (any file): {most_recent_access}")
# Largest files sorted descending
print(f"\nTop {top_n_largest} largest files:")
for _, _, info in sorted(largest_files_heap, key=lambda x: x[0], reverse=True):
print(f"----------------------------------------")
print(f"File: {info['path']}")
print(f"Size: {human_readable_size(info['size'])}")
print(f"Owner: {info['owner']}, Group: {info['group']}")
print(f"Created: {info['ctime']}, Modified: {info['mtime']}, Accessed: {info['atime']}")
# Most recently used files sorted descending
print(f"\nTop {top_n_recent} most recently used files:")
for _, _, info in sorted(recent_files_heap, key=lambda x: x[0], reverse=True):
print(f"----------------------------------------")
print(f"File: {info['path']}")
print(f"Size: {human_readable_size(info['size'])}")
print(f"Owner: {info['owner']}, Group: {info['group']}")
print(f"Created: {info['ctime']}, Modified: {info['mtime']}, Accessed: {info['atime']}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Directory summary with largest, recent, and most recent access time")
parser.add_argument("root", help="Root directory to scan")
parser.add_argument("-n", "--largest", type=int, default=3, help="Number of largest files to show (default 3)")
parser.add_argument("-r", "--recent", type=int, default=5, help="Number of most recently used files to show (default 5)")
args = parser.parse_args()
summarize_directory(args.root, args.largest, args.recent)