-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path#Xpath.py
More file actions
65 lines (46 loc) · 1.74 KB
/
#Xpath.py
File metadata and controls
65 lines (46 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#Xpath
#Would need to download selenuim before
from selenium import webdriver
from selenium.webdriver.chrome.options import options
from selenium.webdriver.chrome.service import service
import pandas as pd
from datetieme import datetime
import os
import sys
#get the path of the executable that ognna create
application_path = os.path.dirname(sys.executable)
now = datetime.now()
Mot_day_year = now.strftime("%m%d%Y")
webiste = "https://www.thesun.co.uk/sport/football"
#path to seleinum
path = "/users/hugoseguin/downloads/chromedriver"
#headless mode
options = Options()
options.headless = True
#Create a driver
service = Service(executable_path=path)
driver = webdriver.Chrome(service=service, options= options)
driver.get(website)
#So the by says how we gonna ceess and vlaue is what we are accessing
container = driver.find_elements(by-"xpath", value='//div[@class="teaser__copy-ontainer"]')
titles = []
subtitles = []
links = []
for container in containers:
title = contianer.find_elemtn(by="xpath", values='./a/h2').text
subtitle= contianer.find_elemtn(by="xpath", values='./a/p').text
link = contianer.find_elemtn(by="xpath", value='./a')get_attribute("href")
title.append(title)
subtitle.append(subtitle)
links.append(links)
my_dict = {'titles':titles, 'subtitle': subtitle, 'link': link}
df_headlines = pd.DataFrame()
file_name = f'headline-{month_day_year}.csv'
final_path = os.path.join(application_path,file_name)
df_headlines.to_csv()
driver.quit()
#chron tab
#09** to run at 9
# then do path so can do path of exectuable
#press exc to save command and do :wq to write and quite tab
#crontab -l so can tell operating system to run executable at 9am everyday