JobHunt/scrape.py at main · DevDynamo01/JobHunt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

# Initialize WebDriver (update path if necessary)
driver = webdriver.Chrome()
#
# # Open LinkedIn login page
# driver.get("https://www.linkedin.com/login")
#
# # Wait for elements to load
# time.sleep(2)
#
# # Locate username and password fields
# username_field = driver.find_element(By.ID, "username")
# password_field = driver.find_element(By.ID, "password")
#
# # Enter credentials
# username_field.send_keys("amartyapawar007@gmail.com")
# password_field.send_keys("b.S@X-mA*A8y6f8")
#
# # Click the login button
# login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
# login_button.click()
#
# # Wait for successful login
# time.sleep(50)

# Example scraping: Visiting a profile or job listings
driver.get("https://www.linkedin.com/jobs/search/?currentJobId=4191319534&distance=25.0&geoId=102713980&keywords=web%20developer&origin=HISTORY")
# Extract job titles
link_elements = WebDriverWait(driver, 15).until(
    EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'a[class*="job-card-container__link"]'))
)
#
links = [element.get_attribute("href") for element in link_elements if element.get_attribute("href")]
#
for idx, link in enumerate(links, start=1):
    print(f"{idx}. {link}")
#
#
#
#
#
# time.sleep(10)
# # Close the browser
# driver.quit()
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# import time  # For adding brief pauses during scrolling
#
# # Initialize the browser driver
# driver = webdriver.Chrome()

# Open the webpage
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# import time

# Initialize WebDriver
# driver = webdriver.Chrome()  # Or your preferred browser driver
# driver.get("https://www.linkedin.com/jobs/search/?currentJobId=4189471241&keywords=software%20developer%20jobs&origin=BLENDED_SEARCH_RESULT_NAVIGATION_SEE_ALL&originToLandingJobPostings=4175550821%2C4191375418%2C4189471241")

# time.sleep(10)
#
# # Scroll and Collect Links
# # WebDriverWait(driver, 20).until(
# #     EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-results-list-top-scroll-sentinel]'))
# # )
# #
# # # Scrollable element
# # scrollable_div = driver.find_element(By.CSS_SELECTOR, 'div.vUCwJIzISzavdDUqUoVFLReVnvJYyjA')
#
# # Scroll and Collect Links
# # links = set()
# # scroll_pause_time = 5
# #
# # # Infinite scroll logic
# # while True:
# #     # Scroll step-by-step using PAGE_DOWN
# #     scrollable_div.send_keys(Keys.PAGE_DOWN)
# #     time.sleep(scroll_pause_time)
#
#     # Collect links
# links=set()
# link_elements = driver.find_elements(By.CSS_SELECTOR, 'a.job-card-container__link')
# for element in link_elements:
#     href = element.get_attribute("href")
#     if href:
#         links.add(href)
#
#     # Break condition: Check for sentinel presence
#     # sentinel = driver.find_elements(By.CSS_SELECTOR, 'div[data-results-list-top-scroll-sentinel]')
#     # if not sentinel:
#     #     break  # Stop scrolling when sentinel disappears
#
# # Display collected links
# for idx, link in enumerate(links, start=1):
#     print(f"{idx}. {link}")
#
# # Close the driver
# driver.quit()