-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrss_reader.py
More file actions
121 lines (97 loc) · 4.18 KB
/
rss_reader.py
File metadata and controls
121 lines (97 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from argparse import ArgumentParser
from typing import Optional, Sequence
import requests
import xml.etree.ElementTree as ET
import json as json
class UnhandledException(Exception):
pass
def rss_parser(
xml: str,
limit: int | None,
json_mode: bool = False,
) -> list[str]:
try:
root = ET.fromstring(xml)
channel = root.find("channel")
if channel is None:
raise ValueError("Invalid RSS feed format")
channel_data = {
"title":channel.findtext("title"),
"link":channel.findtext("link"),
"lastBuildDate":channel.findtext("lastBuildDate"),
"pubDate":channel.findtext("pubDate"),
"managinEditor":channel.findtext("managinEditor"),
"description":channel.findtext("description"),
}
channel_data = {k: v for k, v in channel_data.items() if v}
items_data: list[dict[str, str]] = []
items = channel.findall("item")
for item in items[:limit] if limit else items:
item_data = {
"title": item.findtext("title"),
"author": item.findtext("author"),
"pubDate": item.findtext("pubDate"),
"link": item.findtext("link"),
"category": item.findtext("category"),
"description": item.findtext("description"),
}
item_data = {k: v for k, v in item_data.items() if v}
items_data.append(item_data)
channel_data |= {"items":items_data}
if json_mode:
return [json.dumps(channel_data, indent=4)]
transformed_output = []
if channel_data.get("title"):
transformed_output.append(f"Feed: {channel_data['title']}")
if channel_data.get("link"):
transformed_output.append(f"Link: {channel_data['link']}")
if channel_data.get("lastBuildDate"):
transformed_output.append(f"Last Build Date: {channel_data['lastBuildDate']}")
if channel_data.get("pubDate"):
transformed_output.append(f"Publish Date: {channel_data['pubDate']}")
if channel_data.get("language"):
transformed_output.append(f"Language: {channel_data['language']}")
if channel_data.get("managingEditor"):
transformed_output.append(f"Editor: {channel_data['managingEditor']}")
if channel_data.get("description"):
transformed_output.append(f"Description: {channel_data['description']}")
for item in channel_data["items"]:
if item.get("title"):
transformed_output.append(f"Title: {item['title']}")
if item.get("author"):
transformed_output.append(f"Author: {item['author']}")
if item.get("pubDate"):
transformed_output.append(f"Published: {item['pubDate']}")
if item.get("link"):
transformed_output.append(f"Link: {item['link']}")
if item.get("category"):
transformed_output.append(f"Categories: {item['category']}")
if item.get("description"):
transformed_output.append(item["description"])
return transformed_output
except ET.ParseError as e:
raise UnhandledException(f"Failed to parse XML: {e}")
def main(argv: Optional[Sequence] = None):
parser = ArgumentParser(
prog="rss_reader",
description="Pure Python command-line RSS reader.",
)
parser.add_argument("source", help="RSS URL", type=str, nargs="?")
parser.add_argument(
"--json", help="Print result as JSON in stdout", action="store_true"
)
parser.add_argument(
"--limit", help="Limit news topics if this parameter provided", type=int
)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
}
args = parser.parse_args(argv)
xml = requests.get(args.source,headers=headers).text
try:
print("\n".join(rss_parser(xml, args.limit, args.json)))
return 0
except Exception as e:
raise UnhandledException(e)
if __name__ == "__main__":
main()