-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCodeForcesCategorizer.py
More file actions
158 lines (146 loc) · 5.16 KB
/
CodeForcesCategorizer.py
File metadata and controls
158 lines (146 loc) · 5.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
__author__ = 'zihaozhu'
import urllib
from urllib.request import urlopen
from bs4 import BeautifulSoup as bs
from socket import timeout
import re
import sqlite3
import sys
import os
def insert(codeForces):
conn = sqlite3.connect('codeForces.db')
print("---Inserting data---")
for item in codeForces:
#type str keeps track of all the different types of problems
#insert that into the database based off of entire string
#use % to find
type=""
for st in item[3]:
type= type+st+" "
#print(type)
conn.execute("INSERT INTO CODEFORCES VALUES(?,?,?,?,?)",(None,item[0],item[1],item[2],type))
conn.commit()
print ("Record created successfully!")
conn.close()
#def userAuth():
#updates the db
def update(problems):
conn=sqlite3.connect('codeForces.db')
print("Attempting to update...")
for prob in problems:
#print(prob)
conn.execute("UPDATE CODEFORCES SET STATUS = ? WHERE PROBLEM=?",(1,prob))
conn.commit()
print("Record updated successfully!")
conn.close()
#checks user submission for the problems they've solved
def accepted(name):
acceptedProblem=[]
linkAdd="/page/1"
link = "http://www.codeforces.com"
linkTemp = "http://www.codeforces.com/submissions/"+name+linkAdd
try:
print(linkTemp)
page = urlopen(linkTemp)
except urllib.error.URLError:
print("User does not exist")
exit(0)
except urllib.error.HTTPError:
print("Something went wrong!")
exit(0)
except timeout:
print("Time out!")
exit(0)
soup = bs(page.read(),"html.parser")
linkAdd=soup.find_all('a', href=True, text="→")[0]['href']
print(linkAdd)
while(len(linkAdd)!=0):
table = soup.find('table',{'class': 'status-frame-datatable'})
#print(table)
if table:
problem = table.find_all('a', href=re.compile('/problemset/problem/\d+/\w+'))
status = table.find_all('span')
#print(status )
status = [status[x].text for x in range(0,len(status))]
problem =[problem[x].text.split() for x in range(0,len(problem))]
for i in range (0,len(problem)):
if(status[i]=="Accepted"):
#print(problem[i])
acceptedProblem.append((problem[i][0]))
linkTemp = link+linkAdd
soup =bs(urlopen(link+linkAdd).read(), "html.parser")
submission=soup.find_all('a', href=True, text="→")
try:
linkAdd=submission[0]['href']
except IndexError:
print("Reached the end")
break
return set(acceptedProblem)
def setUp():
#final list to keep track of all the data before insertion
codeForces = []
problemTypeSet = set()
#check if database exists
if(os.path.isfile('codeForces.db')):
print("Connecting to database...")
conn = sqlite3.connect('codeForces.db')
#return
else:
conn = sqlite3.connect('codeForces.db')
conn.execute('''CREATE TABLE CODEFORCES (
ID INTEGER PRIMARY KEY AUTOINCREMENT,
PROBLEM TEXT NOT NULL,
TITLE TEXT NOT NULL,
STATUS INT NOT NULL,
TYPE TEXT NOT NULL
);'''
)
print("Database created successfully.")
link = "http://www.codeforces.com"
page = urlopen(link)
soup = bs(page.read(), "html.parser")
problemSet = soup.find_all('a', href=True, text="Problemset")
if(len(problemSet)==0):
print("Link not found. Please check website")
else:
#print(problemSet)
print("------Initiate crawling------")
linkAdd=problemSet[0]['href']
#print(problemSet[0]['href'])
while(len(problemSet)!=0):
table = soup.find('table', {'class': 'problems'})
if table:
rows = table.find_all('tr')
for tr in rows:
cols = tr.find_all('td')
if(cols):
problemType=[]
problemNum = (cols[0].find_all('a', href=re.compile('/problemset/problem/\d+/\w*')))[0].text.strip()
problemName = (cols[1].find_all('a', href=re.compile('/problemset/problem/\d+/\w*')))[0].text.strip()
if(cols[1].find_all('div'))[1].find_all('a',{'class':'notice'}):
for type in (cols[1].find_all('div'))[1].find_all('a',{'class':'notice'}):
problemTypeSet.add(type.text)
problemType.append(type.text)
#for type in problemType:
# insert(problemNum, problemName, 0, type)
codeForces.append((problemNum, problemName, 0,problemType))
soup =bs(urlopen(link+linkAdd).read(), "html.parser")
problemSet=soup.find_all('a', href=True, text="→")
try:
linkAdd = problemSet[0]['href']
except IndexError:
print("Reached the end")
break
#print(problemTypeSet)
#print(codeForces)
#insert(codeForces)
conn.close()
return problemTypeSet
"""
def main():
setUp()
handle = input("Enter handle name: ")
acceptedProblem=accepted(handle)
update(acceptedProblem)
main()
"""