-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSearchEngine.java
More file actions
110 lines (82 loc) · 3.17 KB
/
SearchEngine.java
File metadata and controls
110 lines (82 loc) · 3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import java.util.*;
/**
* Handles search queries and ranks documents.
*/
public class SearchEngine {
private final InvertedIndex index;
private final TFIDFCalculator tfidf;
public SearchEngine(InvertedIndex index) {
this.index = index;
this.tfidf = new TFIDFCalculator(index);
}
/**
* Searches query and returns ranked document IDs.
*/
/**
* Returns TF-IDF scores for each document for a given query.
*/
public Map<Integer, Double> getDocumentScores(String query) {
List<String> terms = Tokenizer.tokenize(query);
Map<Integer, Double> scoreMap = new HashMap<>();
for (String term : terms) {
Map<Integer, Integer> docs = index.getDocuments(term);
for (int docId : docs.keySet()) {
double score = tfidf.computeTFIDF(term, docId);
scoreMap.put(docId, scoreMap.getOrDefault(docId, 0.0) + score);
}
}
return scoreMap;
}
public List<Integer> search(String query) {
List<String> terms = Tokenizer.tokenize(query);
Map<Integer, Double> scoreMap = new HashMap<>();
for (String term : terms) {
Map<Integer, Integer> docs = index.getDocuments(term);
for (int docId : docs.keySet()) {
double score = tfidf.computeTFIDF(term, docId);
scoreMap.put(docId, scoreMap.getOrDefault(docId, 0.0) + score);
}
}
PriorityQueue<Map.Entry<Integer, Double>> pq =
new PriorityQueue<>((a, b) -> Double.compare(b.getValue(), a.getValue()));
pq.addAll(scoreMap.entrySet());
List<Integer> result = new ArrayList<>();
while (!pq.isEmpty()) {
result.add(pq.poll().getKey());
}
return result;
}
public void printTopKPerTerm(
String query,
int k,
Map<Integer, Document> documentStore
) {
List<String> terms = Tokenizer.tokenize(query);
for (String term : terms) {
System.out.println("\nTop " + k + " documents for word: \"" + term + "\"");
Map<Integer, Integer> docs = index.getDocuments(term);
if (docs.isEmpty()) {
System.out.println(" No documents found.");
continue;
}
PriorityQueue<Map.Entry<Integer, Double>> pq =
new PriorityQueue<>((a, b) -> Double.compare(b.getValue(), a.getValue()));
// Calculate TF-IDF for this term in each document
for (int docId : docs.keySet()) {
double score = tfidf.computeTFIDF(term, docId);
pq.add(Map.entry(docId, score));
}
int count = 0;
while (!pq.isEmpty() && count < k) {
Map.Entry<Integer, Double> entry = pq.poll();
Document doc = documentStore.get(entry.getKey());
System.out.printf(
" • %s (score = %.4f)%n",
doc.getContent(),
entry.getValue()
);
count++;
}
}
}
}