forked from CrazyBurrito/DataMiningProject
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtoFiles.py
More file actions
executable file
·42 lines (37 loc) · 1.35 KB
/
toFiles.py
File metadata and controls
executable file
·42 lines (37 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# toFiles.py
# Author: Dimitrios Economou
#
# Puts comments into files. Probably won't use this.
import sys
import json
import codecs
import glob
import os
import errno
def makePath(path):
try:
os.makedirs(path)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
def main(argv):
# get list of subreddits we are interested in
with open("subreddits.list") as subredditsFile:
subredditsWithNewLines = subredditsFile.readlines()
subreddits = {x.strip('\n').lower() for x in subredditsWithNewLines}
print len(subreddits)
for f in glob.glob("_*"):
with open(f) as fopen:
for comment in fopen:
decodedComment = json.loads(comment)
subreddit = decodedComment["subreddit"]
if subreddit.lower() not in subreddits:
continue
commentId = decodedComment["name"]
parentId = decodedComment["parent_id"]
submissionId = decodedComment["link_id"]
makePath("{0}/{1}".format(subreddit, submissionId))
with codecs.open('{0}/{1}/{2}-{3}'.format(subreddit, submissionId, commentId, parentId), 'w', encoding='utf-8') as commentTxt:
commentTxt.write(decodedComment["body"])
if __name__ == "__main__":
main(sys.argv[1:])