-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassParser.py
More file actions
86 lines (72 loc) · 2.26 KB
/
classParser.py
File metadata and controls
86 lines (72 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from html.parser import HTMLParser
from Course import Schedule
import json
currentTag = ""
validTags = ["table","td","tr","th"]
courseName = ""
courseCode = ""
courseAU = ""
currentData = []
data = {}
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
global currentTag
if(tag in validTags):
currentTag = tag
def handle_endtag(self, tag):
global currentTag
global currentData
global data
global courseCode, courseName, courseAU
currentTag = ""
# if len is 3: new course
# if len is 6: new index
# if len is 5: new entry, same index
if(tag == "tr"):
if(len(currentData) == 3):
courseCode = currentData[0]
courseName = currentData[1]
courseAU = currentData[2]
data[courseCode] = []
else:
if(len(currentData) == 5):
currentData = [""] + currentData
currentData.append("")
elif(len(currentData) == 6):
if(currentData[0].isdigit()):
currentData.append("")
else:
currentData = [""] + currentData
if(len(currentData) == 7):
data[courseCode].append(currentData)
currentData = []
def handle_data(self, data):
global currentData
if(currentTag != "" and data.strip() != ""):
if(currentTag == "td"):
currentData.append(data)
parser = MyHTMLParser()
with open("Class Schedule.html", "r", encoding='utf-8') as f:
lines = f.readlines()
print(len(lines))
parser.feed("".join(lines[:]))
s = []
for key, value in data.items():
currentIndex = ""
for val in value:
sch = Schedule()
if(val[0] != ""):
currentIndex = val[0]
sch.code = key
sch.index = currentIndex
sch.type = val[1]
sch.group = val[2]
sch.day = val[3]
sch.time = val[4]
sch.venue = val[5]
sch.remark = val[6]
s.append(sch.__dict__)
schList = json.dumps(s)
with open("schedules.json", "w") as f:
f.write(schList)
#print(data["CZ1007"])