-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMain.java
More file actions
123 lines (93 loc) · 4.43 KB
/
Main.java
File metadata and controls
123 lines (93 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import java.util.*;
public class Main {
public static void main(String[] args) {
// 1. Create Index, Trie, Store
InvertedIndex index = new InvertedIndex();
Trie trie = new Trie();
Map<Integer, Document> documentStore = new HashMap<>();
// 2. Documents (20+)
List<Document> documents = List.of(
new Document(1, "Java is a popular programming language"),
new Document(2, "Java is widely used in backend development"),
new Document(3, "Backend development requires knowledge of Java and databases"),
new Document(4, "Python is popular for machine learning"),
new Document(5, "Java and Python are both object oriented languages"),
new Document(6, "Spring Boot is a Java framework for backend systems"),
new Document(7, "C++ is used for competitive programming"),
new Document(8, "Backend systems require scalability and performance"),
new Document(9, "Java backend developers often use Spring framework"),
new Document(10, "Machine learning models are usually written in Python"),
new Document(11, "Databases play a crucial role in backend engineering"),
new Document(12, "System design is important for scalable backend applications"),
new Document(13, "REST APIs are commonly built using Java and Spring"),
new Document(14, "Microservices architecture improves backend scalability"),
new Document(15, "Data structures and algorithms are essential for interviews"),
new Document(16, "Competitive programming improves problem solving skills"),
new Document(17, "Python is widely used for data science"),
new Document(18, "Java is strongly typed and object oriented"),
new Document(19, "Backend developers work with servers and databases"),
new Document(20, "Spring framework simplifies Java backend development")
);
// 3. Index documents + Trie
for (Document doc : documents) {
documentStore.put(doc.getId(), doc);
index.addDocument(doc);
for (String word : Tokenizer.tokenize(doc.getContent())) {
trie.insert(word);
}
}
// 4. Search Engine
SearchEngine engine = new SearchEngine(index);
// 5. User Input
Scanner sc = new Scanner(System.in);
System.out.println("Enter search query:");
String query = sc.nextLine();
List<String> tokens = Tokenizer.tokenize(query);
// 6. Prefix Expansion (FAST)
Set<String> expandedWords = new LinkedHashSet<>();
for (String token : tokens) {
if (index.containsWord(token)) {
expandedWords.add(token);
} else {
expandedWords.addAll(trie.autocomplete(token));
}
}
if (expandedWords.isEmpty()) {
System.out.println("No valid keywords found.");
return;
}
String expandedQuery = String.join(" ", expandedWords);
System.out.println("\nExpanded keywords used for search:");
for (String w : expandedWords) {
System.out.println("- " + w);
}
// 7. SINGLE SEARCH PASS
List<Integer> rankedResults = engine.search(expandedQuery);
Map<Integer, Double> scoreMap = engine.getDocumentScores(expandedQuery);
// 8. PRINT TOP 5 RESULTS
System.out.println("\nTop 5 Search Results:\n");
int rank = 1;
for (int docId : rankedResults) {
if (rank > 5) break;
String content = documentStore.get(docId).getContent();
// Highlight matched words
for (String w : expandedWords) {
content = content.replaceAll(
"(?i)\\b" + w + "\\b",
w.toUpperCase()
);
}
System.out.printf(
"%d. %s%n TF-IDF Score: %.4f%n%n",
rank,
content,
scoreMap.getOrDefault(docId, 0.0)
);
rank++;
}
if (rank == 1) {
System.out.println("No relevant documents found.");
}
sc.close();
}
}