-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcode.py
More file actions
142 lines (122 loc) · 4.35 KB
/
code.py
File metadata and controls
142 lines (122 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import time
import numpy as np
import mv100
train_list = mv100.mv1002list("/Users/jas0n/PycharmProjects/svd++/ml-100k/u2.base")
train_length = len(train_list)
rm = mv100.creat_matrix(train_list)
test_list = mv100.mv1002list("/Users/jas0n/PycharmProjects/svd++/ml-100k/u2.test")
print(rm.shape)
def Rudic(): # return the diction of rated item for user U
Rudic = {}
for i in range(0, m):
Rudic[i] = []
for i in range(0, m):
for j in range(0, n):
if (rm[i][j] != -1):
Rudic.get(i).append(j)
return Rudic
def sigmaYj(U):
sum = np.zeros((k, 1))
for index in Rudic.get(U):
sum += y[:, index].reshape(k, 1)
return sum
def getU(): # get average rating
sum = 0
total = 0
for u in range(0, m):
for i in range(0, n):
if (rm[u][i] != -1):
total += 1
sum += rm[u][i]
return sum / total
def RMSE(test): # get rmse loss
sum = 0
total = 0
for line in test:
u_id = int(line[0]) - 1
i_id = int(line[1]) - 1
score = line[2]
sum += (score - Eui(u_id, i_id)) ** 2
total += 1
rmse = (sum / total) ** 0.5
return rmse
def MAE(test): # get mae loss
sum = 0
total = 0
for line in test:
u_id = int(line[0]) - 1
i_id = int(line[1]) - 1
score = line[2]
sum += abs(score - Eui(u_id, i_id))
total += 1
mae = sum / total
return mae
def Eui(u, i): # return the estimate value of the specific user u on item i
eui = average + b_i[i] + b_u[u] + q[i].reshape(1, k).dot(
p[:, u].reshape(k, 1) + (1 / len(Rudic.get(u)) ** 0.5 * sigmaYj(u)))
eui = eui[0][0]
return eui
m = rm.shape[0] # numbers of users
n = rm.shape[1] # numbers of items
k = 50 # the length of p & q
p = np.zeros((k, m)) # matrix of user preference
q = np.zeros((n, k)) # matrix of item quality
b_i = [0] * n # item bias
b_u = [0] * m # user bias
y = np.zeros((k, n)) # implicit
epochs = 30 # total epochs
lr = 0.007 # learning rate
decay = 0.9 # decay
l1 = 0.005 # regularization parameter1
l2 = 0.015 # regularization parameter2
rmse_result_list = []
mae_result_list = []
# erm = np.zeros((m, n)) # estimated rating matrix
average = getU()
Rudic = Rudic()
times = 0
total_trainning_time = 0
total_test_time = 0
rmse = RMSE(test_list)
mae = MAE(test_list)
print("after initializing,\tthe rmse loss is {},\tthe mae loss is {}".format( rmse, mae))
for epoch in range(0, epochs):
current_epoch_times = 0
for line in train_list:
time1 = time.time()
# every single line of the rating data
# update parameter
u_id = int(line[0]) - 1
i_id = int(line[1]) - 1
score = line[2]
eui = score - Eui(u_id, i_id)
b_u[u_id] = b_u[u_id] + lr * (eui - l1 * b_u[u_id])
b_i[i_id] = b_i[i_id] + lr * (eui - l1 * b_i[i_id])
q[i_id] = (q[i_id].reshape(k, 1) + lr * (
eui * (p[:, u_id].reshape(k, 1) + 1 / (len(Rudic.get(u_id)) ** 0.5) * sigmaYj(u_id)) -
l2 * q[i_id].reshape(k, 1))).reshape(1, k)
p[:, u_id] = (p[:, u_id].reshape(k, 1) + lr * (
eui * q[i_id].reshape(k, 1) - l2 * p[:, u_id].reshape(k, 1))).reshape(k)
for j in Rudic.get(u_id):
y[:, j] = y[:, j] + lr * (eui * 1 / (len(Rudic.get(u_id)) ** 0.5) * q[i_id] - l2 * y[:, j])
time2 = time.time()
times += 1
current_epoch_times+=1
total_trainning_time += (time2 - time1)
if (current_epoch_times % 1000 == 0):
print(
"has step {} times,\t{}%,\testimated epoch time is {} s".format(current_epoch_times, round(current_epoch_times / train_length * 100,2),
total_trainning_time / times * train_length))
lr = lr * decay
rmse = RMSE(test_list)
mae = MAE(test_list)
print("this is the {} epoch,\tthe rmse loss is {},\tthe mae loss is {}".format(epoch + 1, rmse,mae))
time3 = time.time()
rmse_result_list.append(rmse)
mae_result_list.append(mae)
time4 = time.time()
total_test_time += (time4-time3)
print("rmse",rmse_result_list)
print("=======================================")
print("mae",mae_result_list)
print("the total_time used in trainning is {}, the total test time is {}".format(total_trainning_time,total_test_time))