-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFetchData.rb
More file actions
executable file
·45 lines (38 loc) · 827 Bytes
/
FetchData.rb
File metadata and controls
executable file
·45 lines (38 loc) · 827 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
require 'open-uri'
require 'json'
require 'nokogiri'
result = JSON.parse(open("http://api.ihackernews.com/page").read)
links = Array.new
result["items"].each do |item|
links << item["url"] if /http/.match(item["url"])
end
puts links
wordlist = Array.new
open("words.txt").each_line do |word|
wordlist.push word.chop()
end
result = ""
links.each do |link|
puts "Opening #{link}"
begin
doc = Nokogiri::HTML(open(link))
result += doc.xpath("//text()").to_s
puts result
rescue Exception => e
puts e
end
end
out = Hash.new(0)
result.split(" ").each do |word|
out[word] += 1 if wordlist.include?(word)
end
puts "finally sorting"
out = out.sort_by { |k,v| v }.reverse
puts "Writing to file"
line = 0
File.open("all_words.txt", 'w') do |file|
while(line < 500) do
file.puts(out[line][0])
line += 1
end
end