-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy patherrorReport.py
More file actions
105 lines (89 loc) · 4.19 KB
/
errorReport.py
File metadata and controls
105 lines (89 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os,re,subprocess
# coding: utf-8
# Create and import the lists of samples to run
print 'Creating and importing file lists...'
from listFiles import *
# Read input arguments
runDir=os.getcwd()
for sample in fileList:
sample = sample.replace('fileLists/','').replace('.txt','')
print '\n============================================='
print 'Working on sample',sample
command = 'ls -tr condor_logs/'+sample+'/*.log'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(logs, err) = proc.communicate()
logs = logs.strip()
starts = [0]
starts += [m.start() for m in re.finditer('\n',logs)]
print 'Sample has',len(starts),'log files'
totaljobs = 0
totaldone = 0
total_files = 0
for ilog in range(len(starts)):
start = starts[ilog]
if start > 0: start = start+1
try:
end = starts[ilog+1]
thislog = "".join(logs[start:end])
except:
thislog = "".join(logs[start:])
print '\tWorking on this log:',thislog
command = 'grep "Job submitted" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
jobs = int("".join(out[0:digits]))
if totaljobs < jobs: totaljobs = jobs
total_files = totaldone + jobs # all done before last plus last time job number
command = 'grep "exceeding requested memory" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
omem = int("".join(out[0:digits]))
print '\t\tOver memory (killed) = ',omem,'/',jobs
command = 'grep "Job was held" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
omemdocker = int("".join(out[0:digits]))
print '\t\tOver memory (held) = ',omemdocker,'/',jobs,'(might not be unique from over memory killed!)'
command = 'grep "more than 2 days" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
otime = int("".join(out[0:digits]))
print '\t\tOver walltime (killed) = ',otime,'/',jobs
command = 'grep "disk" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
odisk = int("".join(out[0:digits]))
print '\t\tOver disk (killed) = ',odisk,'/',jobs
command = 'grep "(return value 1)" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
crash = int("".join(out[0:digits]))
command = 'grep "Abnormal termination" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
crash += int("".join(out[0:digits]))
print '\t\tFile read error (crashed) = ',crash,'/',jobs
command = 'grep "(return value 0)" '+thislog+' | wc'
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
(out, err) = proc.communicate()
out = out.strip()
digits = out.find(' ')
done = int("".join(out[0:digits]))
totaldone += done
print '\t\tDONE = ',done,'/',jobs,'=',round(100*float(done)/float(jobs),2),'%, TOTAL DONE =',round(100*float(totaldone)/float(totaljobs),2),'%'
print '\t\tUnknown (running?) =',max(0,jobs-done-crash-odisk-omemdocker-omem-otime),'/',jobs
print '\n Total files done (correct calculation in total input) = ', totaldone, '/', total_files, ' ≈', round(100*float(totaldone)/float(total_files),2),'%'