-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.rb
More file actions
64 lines (52 loc) · 1.55 KB
/
index.rb
File metadata and controls
64 lines (52 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env ruby
require "set"
require_relative "const"
class Index
def initialize(input_filename, keywords_filename)
@keywords = get_keywords(keywords_filename)
@index = create_index(input_filename)
end
def get_keywords(filename)
keywords = Set.new #for fast retrieval
#parse each line of input file into a keyword (strips \n chars)
File.open(filename, "r") do |file|
while (word = file.gets)
keywords.add(word.strip.downcase)
end
end
keywords = keywords.sort #avoids copies
return keywords
end
def create_index(filename)
#each bucket could match to a Set instead of Array, but since Set may not preserve insert order, Array was preferred
index = Hash.new { |h,k| h[k] = Array.new }
#parse each line of input file
File.open(filename, "r") do |file|
line_count = 1
while (line = file.gets)
#tokenize line into words
line.scan(Const::WORD_REGEX).each do |word|
#verify if words is a keyword and if so, adds to index
if @keywords.include? word.downcase
index[word.downcase].push(line_count)
end
end
line_count = line_count + 1
end
end
#remove duplicates for each bucket
index.each { |k, v| index[k] = v.uniq }
return index
end
def where_appears(word)
if not @keywords.include? word
raise Const::NOT_KEYWORD
end
return @index[word]
end
def print
@keywords.each do |word|
puts "#{word} #{@index[word].join(', ')}"
end
end
end