-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathClustering.java
More file actions
117 lines (87 loc) · 2.58 KB
/
Clustering.java
File metadata and controls
117 lines (87 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
public class Clustering {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
//set up folder called news
// run this program on the directory above news
BufferedReader br = null;
try {
ArrayList<Article> news = new ArrayList<Article>();
String sCurrentLine;
String[] vers = {"a","b","c","d","e"};
for(int i=1; i <= 20; i++){
for(int j=0; j < vers.length; j++){
String id = "" + i + vers[j];
String file = ".\\news\\" + id + ".txt";
br = new BufferedReader(new FileReader(file));
int counter =0;
String newstext= "";
while ((sCurrentLine = br.readLine()) != null) {
if(counter==0 || counter ==1){
}
else{
newstext = newstext + sCurrentLine;
}
counter++;
}
//System.out.print(file);
// System.out.println(newstext);
String ret = Tag(newstext);
System.out.println(ret);
Article cur = new Article(id, newstext);
news.add(cur);
}
}
cluster(news);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)br.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
public static void cluster(ArrayList<Article> news){
System.out.println(news.size());
for(int i=0; i < news.size(); i++){
System.out.println(news.get(i).id);
System.out.println(news.get(i).text);
}
}
private static String Tag(String input) {
/*
* Choosing the model to tag the documents with.
*/
MaxentTagger tagger = new MaxentTagger("models/wsj-0-18-bidirectional-nodistsim.tagger");
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(input));
String retval = "";
for (List<HasWord> sentence : sentences)
{
ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence);
retval = retval + Sentence.listToString(tSentence, false);
}
return retval;
}
}
class Article{
public String id="";
public String text="";
Article(String id, String text){
this.id = id;
this.text = text;
}
}