forked from MadhuRam93/Song-Recommendation-System
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPredict.java
More file actions
284 lines (232 loc) · 9.26 KB
/
Predict.java
File metadata and controls
284 lines (232 loc) · 9.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
package org.myorg;
import java.io.IOException;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Enumeration;
import java.util.Vector;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashSet;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
import java.io.FileNotFoundException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
public class Predict extends Configured implements Tool {
public static void main( String[] args) throws Exception {
int res = ToolRunner .run( new Predict(), args);
System .exit(res);
}
public int run( String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
String intermediate_dir = "intermediate_dir";
Path output_path = new Path(args[3]);
Path intermediate_path = new Path(intermediate_dir);
try {
if(hdfs.exists(output_path)){
hdfs.delete(output_path, true);
} if(hdfs.exists(intermediate_path)){
hdfs.delete(intermediate_path, true);
}
} catch (IOException e) {
e.printStackTrace();
}
conf.set("Sim_Path", args[1]);
conf.set("test_Path", args[2]);
Job job = Job .getInstance(conf, "Predict");
job.setJarByClass(Predict.class);
Path intermed1 = new Path(intermediate_path, "intermed1");
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, intermed1);
job.setMapperClass( Map_Predict .class);
job.setReducerClass( Reduce_Predict .class);
job.setNumReduceTasks(1);
int success = job.waitForCompletion( true) ? 0 : 1;
if(success == 0){
Configuration conf2 = new Configuration();
Job job2 = Job .getInstance(conf2, "Accuracy");
job2.setJarByClass(Predict.class);
Path intermed1_output = new Path(intermed1, "part-r-00000");
FileInputFormat.addInputPath(job2, intermed1_output);
FileOutputFormat.setOutputPath(job2, output_path);
job2.setMapperClass( Map_Accuracy .class);
job2.setReducerClass( Reduce_Accuracy .class);
int success2 = job2.waitForCompletion( true) ? 0 : 1;
}
return 0;
}
/* input: (UserId, [(itemId,Rating)])
* output: (UserId, ItemID, ActualRating, PredictedRating)
*/
//takes test_* file as input
public static class Map_Predict extends Mapper<LongWritable , Text , Text , Text > {
public void map( LongWritable offset, Text lineText, Context context)
throws IOException, InterruptedException {
String line = lineText.toString();
String[] parts = StringUtils.split(line);
if(parts.length == 3){
String userID = parts[0];
String songID = parts[1];
String rating = parts[2];
context.write(new Text(userID), new Text(songID + "," + rating));
}
}
}
public static class Reduce_Predict extends Reducer<Text, Text, Text, Text> {
//private String testFile = "/user/cloudera/recmdation/input/test_0.txt";//Testfile test_0.txt
//private String simFileName = "/user/cloudera/recmdation/input/test0_sim.txt";//Read it from parameter!!mxd
private Map<Integer, Map<Integer, Float>> ISMap = new HashMap<Integer, Map<Integer, Float>>();
private Map<String, List<String>> testList = new HashMap<String, List<String>>();
@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Map<Integer,Integer> UserList = new HashMap<Integer,Integer>();//list of songs user has listened to from train data
for (Text value : values){
int i = Integer.parseInt(value.toString().split(",")[0].trim());
int r = Integer.parseInt(value.toString().split(",")[1].trim());
UserList.put(i,r);
}
for (String val : testList.get(key.toString())) {
String itemId = val.split(",")[0];
String rating = val.split(",")[1];
int p = Math.round(predictRating(Integer.parseInt(itemId.trim()), UserList));
Text Okey = new Text(key.toString() + "\t" + itemId);
Text Ovalue = new Text(rating + "\t" + Integer.toString(p));
context.write(Okey, Ovalue); // UID SongID ActualRating PredRating
}
}/*reduce ends*/
/*setup(org.apache.hadoop.mapreduce.Mapper.Context context) Called once at the beginning of the task.*/
@Override
protected void setup(Context context) throws IOException,InterruptedException {
Configuration confR = context.getConfiguration();
String simFileName = confR.get("Sim_Path");
String testFilePath = confR.get("test_Path");
loadTestData(testFilePath);
loadSimilarityData(simFileName);
}
/* predict the rating using weighted average */
private float predictRating(int itemId, Map<Integer,Integer> userlist){
float sumSR = 0, sumS = 0;
for (Integer item : userlist.keySet()) {
float similarity=0;
int smallerId = item;
int largerId = itemId;
int rating = userlist.get(item);
System.out.println("id and rating >> " + item+ " , " + rating);
if( itemId < smallerId){
int t = largerId;
largerId = smallerId;
smallerId = t;
}
if (ISMap.containsKey(smallerId) && ISMap.get(smallerId).containsKey(largerId)) {
similarity = ISMap.get(smallerId).get(largerId);
}
else {
similarity = 0; // handle the case where item1 and item2 are not similar
}
System.out.println("sim for "+itemId+"is" + similarity);
sumSR += similarity * rating;
sumS += similarity;
}
float pr = 0;
if(sumS > 0)
pr = sumSR/sumS;
return pr;
}
/* initialize the testing data. Load all data from test_0.txt into testList */
private void loadTestData(String testFilePath) throws FileNotFoundException, IOException {
Path pt=new Path(testFilePath);
FileSystem fs = FileSystem.get(new Configuration());
BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(pt)));
String line = null;
while ((line = br.readLine()) != null) {
String[] tokens = line.split("\t");
String userId = tokens[0];
String itemId = tokens[1];
String rating = tokens[2];
String SR = itemId + "," + rating;
if (testList.containsKey(userId)) {
testList.get(userId).add(SR);
}
else {
List<String> temp = new ArrayList<String>();
temp.add(SR);
testList.put(userId, temp);
}
}
br.close();
}/*ends*/
/* Load data from part-r-00000 into Item-Similarity Map : ISMap */
private void loadSimilarityData(String simFileName) throws FileNotFoundException, IOException {
/* item1 (list of [item=sim]) */
Path pt=new Path(simFileName);
FileSystem fs = FileSystem.get(new Configuration());
BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(pt)));
String line = null;
while ((line = br.readLine()) != null) {
String[] tokens = line.split("\t");
int item1 = Integer.parseInt(tokens[0]);
String[] item_sim_list = tokens[1].split(","); //"item=sim"[]
int i = item_sim_list.length;
Map<Integer, Float> temp = new HashMap<Integer, Float>();
for( int m =0; m<i;m++)
{
String item_2 = item_sim_list[m].split("=")[0];
Float sim = Float.parseFloat(item_sim_list[m].split("=")[1]);
int item2 = Integer.parseInt(item_2);
if (item1 > item2) {
int t = item1;
item1 = item2;
item2 = t;
}
temp.put(item2, sim);
}
ISMap.put(item1, temp);
}
br.close();
}
}
public static class Map_Accuracy extends Mapper<LongWritable , Text , Text , IntWritable > {
public void map( LongWritable offset, Text lineText, Context context)
throws IOException, InterruptedException {
String line = lineText.toString();
String[] parts = StringUtils.split(line);
if(parts.length == 4){
String key = parts[0]+parts[1];
int Arating = Integer.parseInt(parts[2].trim());
int Prating = Integer.parseInt(parts[3].trim());
context.write(new Text("Error_Count"), new IntWritable(Math.abs(Arating - Prating)));
}
}
}
public static class Reduce_Accuracy extends Reducer<Text, IntWritable, Text, DoubleWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total = 0;
double sum = 0;
for (IntWritable val : values) {
total++;
sum += val.get();
}
double accuracy = sum/total;
context.write(new Text("Accuracy : "), new DoubleWritable(accuracy));
}
}/*reduce ends*/
}