-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_log.py
More file actions
161 lines (137 loc) · 5.38 KB
/
parse_log.py
File metadata and controls
161 lines (137 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import os
import re
import csv
import time
import shutil
# Parse metric string into a dict
def parse_metrics(line):
metrics = {}
parts = re.findall(r'(\w+@\d+):\s*([\d\.]+)', line)
for key, value in parts:
metrics[key] = float(value)
return metrics
# Parse a single log file
def parse_log_file(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
# Search from the end of the file, find the last BEST flag, then find Valid and Test
best_line = -1
for i in range(len(lines) - 1, -1, -1):
if '█████████████ BEST ████████████████' in lines[i]:
best_line = i
break
if best_line == -1:
return None # No BEST flag, skip
valid_line, test_line = None, None
for i in range(best_line, len(lines)):
if 'Valid:' in lines[i]:
valid_line = lines[i]
if 'Test:' in lines[i]:
test_line = lines[i]
if not valid_line or not test_line:
return None # Missing valid/test line
return {
'valid': parse_metrics(valid_line),
'test': parse_metrics(test_line)
}
def move_incomplete_logs(log_dir, temp_dir='temp_log'):
"""
Check and move incomplete log files to the temp_log folder.
If a file does not have a BEST flag and its last modification time is more than 30 minutes ago, it is considered an incomplete training.
"""
# Ensure the temp_log folder exists
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
current_time = time.time()
moved_files = []
for filename in os.listdir(log_dir):
if filename.endswith('.log'):
file_path = os.path.join(log_dir, filename)
# Check the last modification time of the file
last_modified = os.path.getmtime(file_path)
time_diff = current_time - last_modified
# If the file modification time is more than 30 minutes (1800 seconds)
if time_diff > 1800:
# Check for BEST flag
has_best = False
try:
with open(file_path, 'r') as f:
content = f.read()
if '█████████████ BEST ████████████████' in content:
has_best = True
except Exception as e:
print(f'[ERROR] Failed to read {filename}: {e}')
continue
# If there is no BEST flag, move it to temp_log
if not has_best:
temp_file_path = os.path.join(temp_dir, filename)
try:
shutil.move(file_path, temp_file_path)
moved_files.append(filename)
print(f'[MOVED] {filename} -> {temp_dir}/ (incomplete training)')
except Exception as e:
print(f'[ERROR] Failed to move {filename}: {e}')
return moved_files
# Parse all log files
def parse_all_logs(log_dir):
results = []
for filename in os.listdir(log_dir):
if filename.endswith('.log'):
file_path = os.path.join(log_dir, filename)
parsed = parse_log_file(file_path)
if parsed:
results.append((filename.replace('.log', ''), parsed))
else:
print(f'[WARN] Skipped (incomplete): {filename}')
return results
# Save to CSV
def sort_metrics(metrics):
"""
Group metrics by type and sort by the number after @
e.g., map@5, map@10, recall@5, recall@10
"""
def metric_key(m):
match = re.match(r'(\D+?)@(\d+)', m)
if match:
name, k = match.groups()
return (name, int(k))
return (m, 0)
return sorted(metrics, key=metric_key)
def save_to_csv(results, output_file='log_summary.csv'):
# Collect all occurring metrics
all_metrics = set()
for _, result in results:
all_metrics.update(result['valid'].keys())
all_metrics.update(result['test'].keys())
# Sort metrics
sorted_metrics = sort_metrics(all_metrics)
# Build headers
headers = ['model'] + [f'valid_{m}' for m in sorted_metrics] + [f'test_{m}' for m in sorted_metrics]
# Sort by model name
results.sort(key=lambda x: x[0].lower())
# Write to CSV
with open(output_file, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(headers)
for model_name, result in results:
row = [model_name]
for m in sorted_metrics:
row.append(result['valid'].get(m, ''))
for m in sorted_metrics:
row.append(result['test'].get(m, ''))
writer.writerow(row)
# Main entry point
if __name__ == '__main__':
log_dir = './log' # Your log directory path
# First, move incomplete log files
print("Checking for incomplete log files...")
moved_files = move_incomplete_logs(log_dir)
if moved_files:
print(f"Moved {len(moved_files)} incomplete log files to temp_log/")
else:
print("No incomplete log files found.")
# Then parse the remaining log files
print("\nParsing remaining log files...")
results = parse_all_logs(log_dir)
save_to_csv(results)
print(f'Done. Parsed {len(results)} logs.')