-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatacleaning.py
More file actions
30 lines (25 loc) · 766 Bytes
/
datacleaning.py
File metadata and controls
30 lines (25 loc) · 766 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import csv
#tconst titleType primaryTitle originalTitle isAdult startYear endYear runtimeMinutes genres
#with open("imdbout_title.tsv")
goodmovies = 0
badmovies = 0
with open('imdbout_title.tsv','r') as tsvin, open('new_out2.csv', 'w') as csvout:
tsvin = csv.reader(tsvin, delimiter='\t')
csvout = csv.writer(csvout)
for row in tsvin:
#print(row)
try:
if (row[1] == "movie"):
goodmovies += 1
if (int(row[5]) >= 2000):
if (int(row[7]) >= 60): #check that row is a movie, that it was made more recently than 2000 and it is at least 1hr
#get finance info
goodmovies += 1
#print(row)
csvout.writerows([row])
except:
badmovies += 1
# print("bad row:")
# print(row)
print("good moves:")
print(goodmovies)