Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
jobs.csv
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# frozen_string_literal: true
source "https://rubygems.org"

# gem "rails"
gem 'mechanize', '~> 2.7', '>= 2.7.5'
gem 'chronic', '~> 0.10.2'
44 changes: 44 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
GEM
remote: https://rubygems.org/
specs:
chronic (0.10.2)
domain_name (0.5.20161021)
unf (>= 0.0.5, < 1.0.0)
http-cookie (1.0.3)
domain_name (~> 0.5)
mechanize (2.7.5)
domain_name (~> 0.5, >= 0.5.1)
http-cookie (~> 1.0)
mime-types (>= 1.17.2)
net-http-digest_auth (~> 1.1, >= 1.1.1)
net-http-persistent (~> 2.5, >= 2.5.2)
nokogiri (~> 1.6)
ntlm-http (~> 0.1, >= 0.1.1)
webrobots (>= 0.0.9, < 0.2)
mime-types (3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2016.0521)
mini_portile2 (2.1.0)
net-http-digest_auth (1.4)
net-http-persistent (2.9.4)
nokogiri (1.6.8.1)
mini_portile2 (~> 2.1.0)
nokogiri (1.6.8.1-x86-mingw32)
mini_portile2 (~> 2.1.0)
ntlm-http (0.1.1)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.2)
unf_ext (0.0.7.2-x86-mingw32)
webrobots (0.1.2)

PLATFORMS
ruby
x86-mingw32

DEPENDENCIES
chronic (~> 0.10.2)
mechanize (~> 2.7, >= 2.7.5)

BUNDLED WITH
1.13.6
27 changes: 27 additions & 0 deletions bin/executable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
require_relative '../lib/launch.rb'

d = DiceAgent.new do |agent|
agent.user_agent_alias = 'Windows Chrome'
end
d.history_added = Proc.new { sleep 0.5 }
db = DiceBuilder.new


i = IndeedAgent.new do |agent|
agent.user_agent_alias = 'Windows Chrome'
end
i.history_added = Proc.new { sleep 0.5 }
ib = IndeedBuilder.new
s = JobSaver.new


# loop do
dice_jobs = d.search("rails")
dice_jobs = db.build_jobs(dice_jobs)
indeed_jobs = i.search("rails", location: "Texas")
indeed_jobs = ib.build_jobs(indeed_jobs)
s.save("jobs.csv", dice_jobs)
s.save("jobs.csv", indeed_jobs)
puts "Saving #{indeed_jobs.length + dice_jobs.length} jobs."
# sleep(60)
# end
24 changes: 24 additions & 0 deletions lib/agent.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Job = Struct.new(:title, :job_link, :employer, :location, :company_id, :job_id, :posted)

class DiceAgent < Mechanize

def search(term)
url = "https://www.dice.com/jobs?q="
query = term.gsub(" ", "+")
page = get(url + query)
job_divs = page.search(".complete-serp-result-div")
end
end

class IndeedAgent < Mechanize

def search(term, options = {})
url = "http://www.indeed.com/jobs?q="
query = term.gsub(" ", "+")
location = "&l=#{options[:location] if options[:location]}"
page = get(url + query + location)
job_divs = page.search(".result")
end
end


39 changes: 39 additions & 0 deletions lib/job_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
class JobBuilder < Mechanize
def build_jobs(jobs)
jobs.map{ |job| build(job) }
end
end

class DiceBuilder < JobBuilder

def build(job_div)
title = job_div.search("h3 a").attr("title").value
employer = job_div.search("li.employer .hidden-xs").attr("title").value
location = job_div.search("li.location").attr("title").value

job_link = job_div.search("h3 a").attr('href').value
link_parts = job_link.split("/")
company_id = link_parts[6]
job_id = link_parts[7].split("?")[0]
posted = Chronic.parse(job_div.search("li.posted").text)
Job.new(title, job_link, employer, location, company_id, job_id, posted)
end
end

class IndeedBuilder < JobBuilder

def build(job_div)
title = job_div.search("a").first.attr("title")
job_link = job_div.search("a").first.attr("href")
employer = job_div.search(".company").text.strip
location = job_div.search(".location").text.strip
job_id = job_div.attr("id")
posted_string = job_div.search(".date").text.strip
if posted_string == "30+ days ago"
posted = "Before #{Date.today - 30}"
else
posted = Chronic.parse(job_div.search(".date").text)
end
Job.new(title, job_link, employer, location, nil, job_id, posted)
end
end
34 changes: 34 additions & 0 deletions lib/job_saver.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
class JobSaver

def save(path, jobs)
if File.file?(path)
file = CSV.open(path, 'a+')
add_header(file)
else
file = CSV.open(path, 'a+')
end
jobs.each do |job|
add_job(file, job, path)
end
end

def add_header(file)
file << ["Title", "Job Link", "Employer", "Location", "Company ID", "Job ID", "Posted at"]
end

def add_job(file, job, path)
unless contains?(path, job)
file << [job.title, job.job_link, job.employer, job.location, job.company_id, job.job_id, job.posted]
end
end

def contains?(path,job)
contains = false
CSV.foreach(path) do |row|
if row[5] == job.job_id
contains = true
end
end
contains
end
end
8 changes: 8 additions & 0 deletions lib/launch.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
require 'rubygems'
require 'mechanize'
require 'chronic'
require 'csv'
require 'date'
require_relative 'agent.rb'
require_relative 'job_builder.rb'
require_relative 'job_saver.rb'