-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtool.py
More file actions
38 lines (29 loc) · 1.25 KB
/
tool.py
File metadata and controls
38 lines (29 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import csv
import requests
from bs4 import BeautifulSoup
def scrape_headings(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
headings = set() # Use a set to store unique headings
for tag in soup.find_all(['h2', 'h3', 'h4']):
heading_text = tag.get_text().strip()
if heading_text not in headings: # Check for duplicates
headings.add(heading_text)
return headings
def combine_headings(urls):
all_headings = set() # Use a set to store unique headings
for url in urls:
headings = scrape_headings(url)
all_headings.update(headings) # Use 'update' method to merge sets
return all_headings
def save_to_csv(headings, filename):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Headings'])
writer.writerows(zip(headings))
# Example usage
article_urls = input("Enter the URLs (comma-separated): ").split(',')
combined_headings = combine_headings(article_urls)
csv_filename = input("Enter the CSV filename to save the headings: ")
save_to_csv(combined_headings, csv_filename)
print(f"Headings saved to {csv_filename} successfully.")