-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathanalyzer.py
More file actions
executable file
·132 lines (107 loc) · 4.74 KB
/
analyzer.py
File metadata and controls
executable file
·132 lines (107 loc) · 4.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import numpy as N
import cPickle
import sqlite3
SQLITE3_DB_PATH = 'results/results.sqlite3'
class Analyzer:
def __init__(self, test_cases, sqlite_db):
"""
Constructor. The test_cases param is used for specifying the training and test files. An example is given bellow:
tests_cases = [
{
'test_dataset' : 'dataset/u1.test',
'user_features_file' : 'results/userFeatures_20-06-2010_17:11.txt',
'movie_features_file' : 'results/movieFeatures_20-06-2010_17:11.txt'
}
]
If test_cases is None, the training and test files are taken from the sqlite_db given as a parameter.
"""
self.results_db = sqlite_db
# A test case contains more 3-uples of form (user_feature_file, movie_feature_file, test_database) in a hash format
if not test_cases == None:
self.test_cases = test_cases
def read_features(self, users_features_file, movie_feature_file):
"""
Read userFeatures and movieFeatures from the corresponding files given as parameters
"""
FILE = open(users_features_file, 'r')
userFeatures = cPickle.load(FILE)
FILE.close()
FILE = open(movie_feature_file, 'r')
moviesFeatures = cPickle.load(FILE)
FILE.close()
return userFeatures, moviesFeatures
def analyze(self, users_features_file, movie_feature_file, test_database):
"""
Analyzes a pair of (userFeatures, movieFeatures) against a test_database in terms of prediction accuracy
"""
FILE = open(test_database, 'r')
userFeatures, moviesFeatures = self.read_features(users_features_file, movie_feature_file)
total = 0
correct = 0
for movie_line in FILE.readlines():
movie_line = movie_line.rstrip()
(user_id, movie_id, rating, date) = movie_line.split("\t", 3)
user_id = int(user_id)
movie_id = int(movie_id)
rating = int(rating)
predicted_rating = 0.0
for i in range(userFeatures.shape[0]):
predicted_rating += userFeatures[i][user_id] * moviesFeatures[i][movie_id]
total += 1
err = abs(predicted_rating - rating)
if err < 1:
correct += 1
return (total, correct)
def test_files(self):
"""
test each pair given as parameter in the constructor, via self.test_cases
"""
for test_case in self.test_cases:
self.test(test_case['user_features_file'], test_case['movie_features_file'], test_case['test_database'])
def test_from_sql(self):
"""
test each file pair in the sqlite3 database
"""
conn = sqlite3.connect(SQLITE3_DB_PATH)
c = conn.cursor()
c.execute('select * from results')
for row in c:
# date | algorithm | epochs | features | running_time | RMSE | training_dataset | test_dataset | match
users_features_file = "results/userFeatures_" + row[0] + ".txt"
movie_features_file = "results/movieFeatures_" + row[0] + ".txt"
test_database = row[7]
percent_match = self.test(users_features_file, movie_features_file, test_database)
u = conn.cursor()
u.execute('update results set match=? where date=?', (percent_match, row[0], ))
conn.commit()
u.close()
c.close()
conn.close()
def test(self, users_features_file, movie_features_file, test_database):
print ''
print "TEST: "
print "---------------------------------------------------"
print "Users features: " + users_features_file
print "Movies features: " + movie_features_file
print "Test db: " + test_database
print "Analyzing results ... "
total, correct = self.analyze(users_features_file, movie_features_file, test_database)
percent_match = ((100.0 * float(correct)) / float(total))
print "Match: ", ((100.0 * float(correct)) / float(total)) , "%"
return percent_match
tests_cases = [
{
'test_dataset' : 'dataset/u1.test',
'user_features_file' : 'results/userFeatures_20-06-2010_17:11.txt',
'movie_features_file' : 'results/movieFeatures_20-06-2010_17:11.txt'
}
]
if __name__ == "__main__":
"""
The purpose of this utility is to analyze how efficient is a predicted model for a test dataset.
It uses the Analyzer class.
"""
analyzer = Analyzer(tests_cases, "results/results.sqlite3")
# analyzer = Analyzer(None)
analyzer.test_from_sql()
print "Done."