vikingeducation · SawyerMerchant · Nov 13, 2016 · Nov 14, 2016 · Nov 14, 2016 · Nov 14, 2016
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+/.bundle/
+/.yardoc
+/Gemfile.lock
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
diff --git a/.rspec b/.rspec
@@ -0,0 +1,3 @@
+--color
+--require spec_helper
+--format documentation
diff --git a/Gemfile b/Gemfile
@@ -0,0 +1,8 @@
+source 'https://rubygems.org'
+
+# Specify your gem's dependencies in pingpongpairing.gemspec
+gem "rspec", "~> 3.5.0"
+gem "guard", "~> 2.14"
+gem "guard-shell", "~> 0.7.1"
+gem "guard-rspec", "~> 4.7.3"
+gem "mechanize"
diff --git a/Guardfile b/Guardfile
@@ -0,0 +1,22 @@
+guard :rspec, cmd: "bundle exec rspec" do
+  # watch /lib/ files
+  watch(%r{^lib/(.+).rb$}) do |m|
+    "spec/#{m[1]}_spec.rb"
+  end
+
+  watch(%r{^lib/gameboard/(.+).rb$}) do |m|
+    "spec/#{m[1]}_spec.rb"
+  end
+
+  # watch /spec/ files
+  watch(%r{^spec/(.+).rb$}) do |m|
+    "spec/#{m[1]}.rb"
+  end
+end
+
+guard :shell do
+  watch %r{^pair/.*\.rb$} do |m|
+    n m[0], 'Changed'
+    `bundle exec exe/sendevent "http://localhost:5000" #{m[0]}`
+  end
+end
diff --git a/README.md b/README.md
@@ -2,3 +2,6 @@
 Bottom feeding your way across the web
 
 [A Ruby-based web scraping and parsing project which uses NokoGiri, Mechanize, Open-uri.  From the Viking Code School.](http://www.vikingcodeschool.com)
+
+
+John Sawyer
diff --git a/exe/parallel b/exe/parallel
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+for cmd in "$@"; do {
+  echo "Process \"$cmd\" started";
+  $cmd & pid=$!
+  PID_LIST+=" $pid";
+} done
+
+trap "kill $PID_LIST" SIGINT
+
+echo "Parallel processes have started";
+
+wait $PID_LIST
+
+echo
+echo "All processes have completed";
diff --git a/exe/pingpong b/exe/pingpong
@@ -0,0 +1,3 @@
+#!/bin/bash
+my_dir=`dirname $0`
+$my_dir/parallel "$my_dir/tunnel" "$my_dir/webhook" "bundle exec guard start -i"
diff --git a/exe/sendevent b/exe/sendevent
@@ -0,0 +1,24 @@
+#!/usr/bin/env ruby
+require "net/http"
+require "uri"
+require "json"
+
+url = "#{ARGV[0]}/event"
+file_to_serialize = ARGV[1]
+file = File.open(file_to_serialize, "r")
+contents = file.read
+file.close
+
+
+
+new_json = {
+  filename: file_to_serialize.split('/')[-1],
+  data: "#{contents}"
+}
+
+req_body = JSON.generate(new_json, quirks_mode: true)
+uri = URI.parse(url)
+http = Net::HTTP.new(uri.host, uri.port)
+request = Net::HTTP::Post.new(uri.request_uri, 'Content-Type' => 'application/json')
+request.body = req_body
+resp = http.request(request)
diff --git a/exe/tunnel b/exe/tunnel
@@ -0,0 +1,2 @@
+#!/bin/bash
+ngrok http 5000
diff --git a/exe/webhook b/exe/webhook
@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+require 'sinatra'
+require 'json'
+
+set :port, 5000
+
+post '/event' do
+  status 204 #successful request with no body content
+
+  request.body.rewind
+  p request_payload = JSON.parse(request.body.read)
+
+  #append the payload to a file
+  File.open("spec/#{request_payload["filename"]}", "w") do |f|
+    f.puts(request_payload["data"])
+  end
+end
+
diff --git a/jobs_2016_11_14.csv b/jobs_2016_11_14.csv
diff --git a/lib/job_search.rb b/lib/job_search.rb
@@ -0,0 +1,35 @@
+require_relative 'parse_dice'
+require_relative 'parse_indeed'
+require 'csv'
+
+class JobSearch
+
+  def initialize(term, location = nil)
+    scrapers = [ParseDice.new(term, location),ParseIndeed.new(term, location)]
+    to_csv(run_scrapers(scrapers))
+  end
+
+  def run_scrapers(scrapers)
+    results = []
+    scrapers.each do |scraper|
+      results << scraper.search
+    end
+    results.flatten
+  end
+
+
+  def to_csv(results)
+    time = Time.now.strftime("%Y_%m_%d")
+    CSV::open("jobs_#{time}.csv", "w+") do |csv|
+      csv << ["Title", "Link", "Description", "Company", "Company Site", "Location", "Date"]
+      results.compact.uniq.each do |result|
+        csv << [result[:title], result[:link], result[:desc], result[:company], result[:company_url], result[:location], result[:date]]
+      end
+    end
+  end
+
+
+end
+
+
+dice = JobSearch.new( 'Developer', '33613' )
diff --git a/lib/parse_dice.rb b/lib/parse_dice.rb
@@ -0,0 +1,38 @@
+require_relative 'web_scraper'
+
+class ParseDice < WebScraper
+
+  def initialize(term, location = nil)
+    @agent = Mechanize.new
+    @term = term
+    @location = "-l-#{location}-radius-30" if location
+  end
+
+  def build_url(page)
+    "https://www.dice.com/jobs/jtype-Full\%20Time-q-#{@term}#{@location}-startPage-#{page}-limit-120-jobs.html"
+  end
+
+  def organize(results)
+    jobs = []
+    results = results.search("div.serp-result-content")
+    results.each do |result|
+      h3 = result.search("h3")[0]
+      title = h3.text.strip
+      link = h3.search("a")[0]["href"]
+      short_desc = result.search("div.shortdesc")[0].text.strip
+      company = result.search("li.employer .hidden-xs a")[0]
+      company_name = company.text.strip
+      company_url = company["href"]
+      location = result.search("li.location")[0].text.strip
+      posting_date = result.search("li.posted")[0].text.strip
+      posting_date = calculate_date(posting_date).strftime("%Y_%m_%d %l %p") if posting_date
+      #job_id = result.search("input")[0]
+      jobs << {title: title, link: link, desc: short_desc, company: company_name, company_url: company_url, location: location, date: posting_date}
+    end
+
+    jobs
+  end
+
+
+end
+
diff --git a/lib/parse_indeed.rb b/lib/parse_indeed.rb
@@ -0,0 +1,52 @@
+require_relative 'web_scraper'
+
+class ParseIndeed < WebScraper
+
+
+  def initialize(term, location = nil)
+    @agent = Mechanize.new
+    @term = term
+    @location = "&l=#{location}" if location
+  end
+
+  def build_url(page)
+    "http://www.indeed.com/jobs?q=#{@term}#{@location}&jt=fulltime&start=#{(page-1)*10}"
+  end
+
+  def organize(results)
+    next_page = results.search("span.np")[0]
+    if next_page
+      if next_page.text.strip == "« Previous"
+        next_page = results.search("span.np")[1]
+        return [] unless next_page
+      end
+    end
+    parse_results(results.search("div.result"))
+  end
+
+  def parse_results(results)
+    jobs = []
+    results.each do |result|
+      h2 = result.search("h2.jobtitle")[0]
+      if h2
+        title = h2.text.strip
+        link = h2.search("a")[0]["href"]
+      else
+        title = result.search("a.jobtitle")[0].text.strip
+        link = result.search("a.jobtitle")[0]["href"]
+      end
+      short_desc = result.search("span.summary")[0].text.strip
+      company = result.search("span.company")[0]
+      company_name = company.text.strip
+      company_url = company.search("a")[0]
+      company_url = company_url["href"] if company_url
+      location = result.search("span.location")[0].text.strip
+      posting_date = result.search("span.date")[0].text.strip
+      posting_date = calculate_date(posting_date).strftime("%Y_%m_%d %l %p") if posting_date
+      #job_id = result.search("input")[0]
+      jobs << {title: title, link: link, desc: short_desc, company: company_name, company_url: company_url, location: location, date: posting_date}
+    end
+    jobs
+  end
+
+end
diff --git a/lib/web_scraper.rb b/lib/web_scraper.rb
@@ -0,0 +1,47 @@
+require 'mechanize'
+require 'active_support/core_ext/numeric/time'
+
+class WebScraper
+
+  attr_reader :agent
+
+  def initialize
+    @agent = Mechanize.new
+  end
+
+  def organize
+    raise NotImplementedError("Methods Need to be defined by subclass")
+  end
+
+  def build_url
+    raise NotImplementedError("Methods Need to be defined by subclass")
+  end
+
+  def search(page = 1)
+    sleep(1)
+    puts build_url(page)
+    results = organize(agent.get(build_url(page)))
+    results << search(page + 1) if results.length > 0
+  end
+
+  def calculate_date(date_string)
+    arr = date_string.split(" ")
+    arr[0] = arr[0].to_i
+    case arr[1]
+    when /minute/
+      arr[0].minutes.ago
+    when /hour/
+      arr[0].hours.ago
+    when /day/
+      arr[0].days.ago
+    when /week/
+      arr[0].weeks.ago
+    when /month/
+      arr[0].months.ago
+    end
+  end
+
+
+end
+
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,3 +2,6 @@
		Bottom feeding your way across the web

		[A Ruby-based web scraping and parsing project which uses NokoGiri, Mechanize, Open-uri. From the Viking Code School.](http://www.vikingcodeschool.com)


		John Sawyer