-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathreader.py
More file actions
50 lines (45 loc) · 1.4 KB
/
reader.py
File metadata and controls
50 lines (45 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import re
import os
filepath = 'code4lib.log0405.txt'
file = open (filepath)
count = 0
if not os.path.isdir("extract"):
os.makedirs("extract")
writeFile = open ("extract/test.txt",'w')
for line in file:
# delimiter ======= create new file
if re.match (r"^={30,}",line):
writeFile.close()
# date format for the file name
elif line.startswith("Date:"):
count = count + 1
date = re.sub(r"^[A-Z].*:\s+","",line)
date = date.replace(",", "")
date = date.replace(" ", "_")
date = date.replace(":", "_")
filename = "{}_{}.txt".format(date,count)
writeFile = open ("extract/{}".format(filename),'w')
print "Creating file: {}".format(filename)
# skipping the reply
elif re.match(r"^>", line):
continue
elif line[0] == '\n': # new line
continue
#skip writing this line from the header
elif line.startswith("Reply-To:"):
continue
elif line.startswith("Sender:"):
continue
elif line.startswith("MIME-Version:"):
continue
elif line.startswith("Content-Type:"):
continue
elif line.startswith("Content-type"):
continue
elif line.startswith("Content-Transfer-Encoding:"):
continue
# stripping the text from the standard email
else:
newLine = re.sub(r"^[A-Z].*:\s+","",line)
writeFile.write(newLine)
writeFile.close()