forked from MadhuRam93/Song-Recommendation-System
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPreprocess.java
More file actions
61 lines (46 loc) · 1.88 KB
/
Preprocess.java
File metadata and controls
61 lines (46 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package org.myorg;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
public class Preprocess {
/*
* Mapper function 1
* input: offset & content (lineText) of every line in the input file
* output: <key, value> pairs -> <userID songID=rating>
* Mapper function reads the input file, one line at a time, converts it to String and splits the line at every space to extract all the words - userID, songID & rating
* Each line is written to the context in the form : userID songID=rating
*/
public static class Map_Preprocess extends Mapper<LongWritable , Text , Text , Text > {
public void map( LongWritable offset, Text lineText, Context context)
throws IOException, InterruptedException {
String line = lineText.toString();
String[] parts = StringUtils.split(line);
if(parts.length == 3){
String userID = parts[0];
String songID = parts[1];
String rating = parts[2];
context.write(new Text(userID), new Text(songID + "=" + rating));
}
}
}
/*
* Reducer function 1
* input: output <key, value> pairs from Mapper1 -> <userID [list of songID=rating]>
* output: <key, value> pairs -> <userID songID=rating_List>
* Reducer function iterates over the list of song-rating for each user, appends them to a string & outputs a string for each user.
*/
public static class Reduce_Preprocess extends Reducer<Text , Text , Text , Text > {
@Override
public void reduce( Text userID, Iterable<Text > values, Context context)
throws IOException, InterruptedException {
String songID_rating = "";
for ( Text val : values) {
songID_rating = songID_rating + "," + val.toString();
}
context.write(userID, new Text(songID_rating));
}
}
}