-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLinkCrawler
More file actions
52 lines (39 loc) · 1.63 KB
/
LinkCrawler
File metadata and controls
52 lines (39 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
PAUSE_TIME = 1.5
CLICK_COUNT = 0
URL = "https://www.fragrantica.com/search/?godina=2014%3A2014&spol=male"
driverPath = "D:\chromedriver-win64\chromedriver.exe"
driver = webdriver.Chrome()
driver.get(URL)
time.sleep(5)
# "Show more results" 버튼이 나타날 때까지 스크롤 다운 및 버튼 클릭
# while True:
for n in range (35): # 테스트 코드
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(PAUSE_TIME)
try:
show_more_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Show more results')]")
show_more_button.click()
CLICK_COUNT += 1
print(CLICK_COUNT) # 버튼 클릭 횟수 (어디까지 진행됐는지 궁금할 것 같아서)
except Exception as e:
print("예외 발생: ", e) # 예외 출력
break
# 모든 결과 가져오기
time.sleep(PAUSE_TIME)
elements = driver.find_elements(By.CSS_SELECTOR, ".card-section a")
data = []
for element in elements:
link_url = element.get_attribute("href")
data.append(link_url)
# print(link_url)
print("데이터: ", len(elements), "건")
# 데이터 내보내기
df = pd.DataFrame(data)
df.to_csv(f"2014_male_{len(data)}.csv", index=False)
print("CSV 파일 생성")
#<button class="button">Show more results</button>
#main-content > div:nth-child(1) > div.small-12.medium-8.large-9.cell > div > div > div > div.off-canvas-content.content1 > div.grid-x.grid-padding-x.grid-padding-y > div > div:nth-child(3) > div > div > div > div > div > button