-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathInvertedIndex.java
More file actions
66 lines (53 loc) · 1.58 KB
/
InvertedIndex.java
File metadata and controls
66 lines (53 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import java.util.*;
/**
* InvertedIndex maps:
* word -> (documentId -> frequency in that document)
*/
public class InvertedIndex {
private final Map<String, Map<Integer, Integer>> index;
private int totalDocuments;
public InvertedIndex() {
index = new HashMap<>();
totalDocuments = 0;
}
/**
* Adds a document to the index.
*/
public void addDocument(Document document) {
totalDocuments++;
List<String> words = Tokenizer.tokenize(document.getContent());
for (String word : words) {
index.putIfAbsent(word, new HashMap<>());
Map<Integer, Integer> docFrequencyMap = index.get(word);
int docId = document.getId();
docFrequencyMap.put(
docId,
docFrequencyMap.getOrDefault(docId, 0) + 1
);
}
}
/**
* Returns map of documentId -> frequency for a given word.
*/
public Map<Integer, Integer> getDocuments(String word) {
return index.getOrDefault(word, Collections.emptyMap());
}
/**
* Number of documents indexed.
*/
public int getTotalDocuments() {
return totalDocuments;
}
/**
* Document Frequency (DF) of a word.
*/
public int getDocumentFrequency(String word) {
return index.containsKey(word) ? index.get(word).size() : 0;
}
/**
* Check if word exists in index.
*/
public boolean containsWord(String word) {
return index.containsKey(word);
}
}