forked from saimadhu-polamuri/CollaborativeFiltering
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollaborative_filtering.py
More file actions
117 lines (90 loc) · 4.36 KB
/
collaborative_filtering.py
File metadata and controls
117 lines (90 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# !/usr/bin/env python
# Implementation of collaborative filtering recommendation engine
from recommendation_data import dataset
from math import sqrt
def similarity_score(person1, person2): # 欧氏距离计算相似度
# Returns ratio Euclidean distance score of person1 and person2
both_viewed = {} # To get both rated items by person1 and person2
for item in dataset[person1]:
if item in dataset[person2]:
both_viewed[item] = 1
# Conditions to check they both have an common rating items
if len(both_viewed) == 0:
return 0
# Finding Euclidean distance
sum_of_eclidean_distance = []
for item in dataset[person1]:
if item in dataset[person2]:
sum_of_eclidean_distance.append(pow(dataset[person1][item] - dataset[person2][item], 2))
sum_of_eclidean_distance = sum(sum_of_eclidean_distance)
return 1 / (1 + sqrt(sum_of_eclidean_distance)) # 防止分母为0
def pearson_correlation(person1, person2): # 皮尔森相关系数计算相似度
# To get both rated items
both_rated = {}
for item in dataset[person1]:
if item in dataset[person2]:
both_rated[item] = 1
number_of_ratings = len(both_rated)
# Checking for number of ratings in common
if number_of_ratings == 0:
return 0
# Add up all the preferences of each user
person1_preferences_sum = sum([dataset[person1][item] for item in both_rated])
person2_preferences_sum = sum([dataset[person2][item] for item in both_rated])
# Sum up the squares of preferences of each user
person1_square_preferences_sum = sum([pow(dataset[person1][item], 2) for item in both_rated])
person2_square_preferences_sum = sum([pow(dataset[person2][item], 2) for item in both_rated])
# Sum up the product value of both preferences for each item
product_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated])
# Calculate the pearson score(根据皮尔森相关系数计算公式4)
numerator_value = product_sum_of_both_users - (
person1_preferences_sum * person2_preferences_sum / number_of_ratings)
denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum, 2) / number_of_ratings) * (
person2_square_preferences_sum - pow(person2_preferences_sum, 2) / number_of_ratings))
if denominator_value == 0:
return 0
else:
r = numerator_value / denominator_value
return r
def most_similar_users(person, number_of_users):
# returns the number_of_users (similar persons) for a given specific person.
scores = [(pearson_correlation(person, other_person), other_person) for other_person in dataset if
other_person != person]
# Sort the similar persons so that highest scores person will appear at the first
scores.sort()
scores.reverse()
return scores[0:number_of_users]
def user_reommendations(person):
# Gets recommendations for a person by using a weighted average of every other user's rankings
totals = {}
simSums = {}
rankings_list = []
for other in dataset:
# don't compare me to myself
if other == person:
continue
# the second function call,calculate pearson_correlation
sim = pearson_correlation(person, other)
# print ">>>>>>>",sim
# ignore scores of zero or lower
if sim <= 0:
continue
for item in dataset[other]:
# only score movies i haven't seen yet
if item not in dataset[person] or dataset[person][item] == 0:
# Similrity * score
totals.setdefault(item, 0)
totals[item] += dataset[other][item] * sim
# sum of similarities
simSums.setdefault(item, 0)
simSums[item] += sim
# Create the normalized list,rankings是预测目标用户对没看过的电影的预测分数
rankings = [(total / simSums[item], item) for item, total in totals.items()]
# print(rankings)
rankings.sort()
rankings.reverse() # 由低到高排分数再逆序
# returns the recommended items
recommendataions_list = [recommend_item for score, recommend_item in rankings]
return recommendataions_list
# print(most_similar_users('Toby', 3))
print(user_reommendations('Toby'))