-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrake.gs
More file actions
129 lines (106 loc) · 3.6 KB
/
rake.gs
File metadata and controls
129 lines (106 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//Browserify didn't work. I'm going to try to use node-rake as a guideline to re-code RAKE
//A LOT OF THIS CODE AND ALL IDEAS ARE COPIED FROM node-rake. I am only porting it to work in .gs
function rake(text) {
const sentenceList = splitTextToSentences(text);
//Logger.log(sentenceList);
const phrasesList = generatePhrases(sentenceList);
//Logger.log(phrasesList);
const wordScores = calculateKeywordScores(phrasesList);
//Logger.log(wordScores);
const phraseScores = calculatePhraseScores(phrasesList, wordScores);
//Logger.log(phraseScores);
const result = sortPhrases(phraseScores);
//Logger.log(result);
//Logger.log(result[0]);
return result;
}
function rakeScored(text){
const sentenceList = splitTextToSentences(text);
//Logger.log(sentenceList);
const phrasesList = generatePhrases(sentenceList);
//Logger.log(phrasesList);
const wordScores = calculateKeywordScores(phrasesList);
//Logger.log(wordScores);
const result = calculatePhraseScores(phrasesList, wordScores);
//Logger.log(result);
return result;
}
function weightedrake(pairs){
let results = {};
for (let i = 0; i < pairs.length; i++){
let pair = pairs[i];
let result = rakeScored(pair[0]);
for (let [key, value] of Object.entries(result)){
if (results[key]){
results[key] += value;
} else {
results[key] = value;
}
}
}
return sortPhrases(results)
}
function splitTextToSentences(text) {
const sentences = text.match(/[^.!?:\\(),]+/g);//(),
const filteredSentences = sentences.filter(s => s.replace(/ +/g, '') !== '');
return filteredSentences;
}
function removeStopWords(sentence) {//Currently not working
//const r = stopwordsRegex; //hmmm. does this work in google scripts?
//new RegExp(`\\b(?:${r})\\b`, 'ig');
const filteredSentence = sentence.replace(stopwordsTrueRegex, '|').split('|');
return filteredSentence;
}
function generatePhrases(sentenceList) {
const reg = /['!"“”’#$%&()*+,\-./:;<=>?@[\\\]^_`{|}~']/g;
const phrases = sentenceList.map(s => removeStopWords(s));
const phraseList = phrases.map(phrase => phrase
.filter(phr => (phr.replace(reg, '') !== ' ' && phr.replace(reg, '') !== ''))
.map(phr => phr.trim(" ,.()"))
);
const flattenedList = [].concat(...phraseList);
return flattenedList;
}
function calculateKeywordScores(phraseList) {
const wordFreq = {};
const wordDegree = {};
const wordScore = {};
phraseList.forEach((phrase) => {
const wordList = phrase.match(/[,.!?;:/‘’“”]|\b[0-9a-z']+\b/gi);
if(wordList){
const wordListDegree = wordList.length;
wordList.forEach((word) => {
if (wordFreq[word]) {
wordFreq[word] += 1;
}
else {
wordFreq[word] = 1;
}
if (wordDegree[word]) {
wordDegree[word] += wordListDegree;
}
else {
wordDegree[word] = wordListDegree;
}
});
}
});
//check if works in google scripts
Object.values(wordFreq).forEach((freq) => { wordDegree[freq] += wordFreq[freq]; });
Object.keys(wordFreq).forEach((i) => { wordScore[i] = wordDegree[i] / (wordFreq[i] * 1.0); });
return wordScore;
}
function calculatePhraseScores(phraseList, wordScore) {
const phraseScores = {};
phraseList.forEach((phrase) => {
phraseScores[phrase] = 0;
let candidateScore = 0;
const wordList = phrase.match(/(\b[^\s]+\b)/g);
wordList.forEach((word) => { candidateScore += wordScore[word]; });
phraseScores[phrase] = candidateScore;
});
return phraseScores;
}
function sortPhrases(obj) {
return Object.keys(obj).sort((a, b) => obj[b] - obj[a]);
}