| name | Onliner |
|---|
| size | 1 |
|---|
| title | - Беларусь с 1 июля вводит безвизовый въезд для граждан Польши
|
|---|
| pubDate | - Thu, 30 Jun 2022 19:26:28 +0300
|
|---|
| description | - С 1 июля по 31 декабря Беларусь вводит безвизовый режим для граждан Польши. Об этом говорится в телеграм-канале Пограничного комитета.Читать далее…
|
|---|
| link | - https://people.onliner.by/2022/06/30/belarus-s-1-iyulya-vvodit-bezvizovyj-vezd-dlya-grazhdan-polshi
|
|---|
\ No newline at end of file
diff --git a/printer.py b/printer.py
new file mode 100644
index 00000000..7a38ec58
--- /dev/null
+++ b/printer.py
@@ -0,0 +1,16 @@
+from loguru import logger
+
+
+class Printer:
+ """Print result in stdout"""
+
+ def __init__(self, data: dict) -> None:
+ logger.debug("Data is available (debug)!")
+ self.data = data
+
+ def __str__(self) -> str:
+ return self.data["name"] + '\n' + ''.join([(f'{self.data["title"][i]}\n'
+ f'{self.data["pubDate"][i]}\n\n'
+ f'{self.data["description"][i]}\n\n'
+ f'{self.data["link"][i]}\n\n---------------\n\n') for i in
+ range(self.data["size"])])
diff --git a/reader.py b/reader.py
new file mode 100644
index 00000000..fb61345a
--- /dev/null
+++ b/reader.py
@@ -0,0 +1,71 @@
+from loguru import logger
+import requests
+from bs4 import BeautifulSoup
+import re
+import sys
+
+
+class Reader:
+ """Parse data from URL"""
+
+ def __init__(self, source: str, limit=-1) -> None:
+ self.version = '4.0'
+ self.source = source
+ self.name = self.get_acces()[0]
+ self.items = self.get_acces()[1]
+ logger.info("Acces is available (info)!")
+ self.limit = len(self.items) if limit == -1 or limit > len(self.items) else limit
+ self.title = self.get_title()
+ logger.info("Title is available (info)!")
+ self.pubDate = self.get_pubDate()
+ logger.info("PubDate is available (info)!")
+ self.link = self.get_link()
+ logger.info("Link is available (info)!")
+ self.clear_description = list()
+ self.description = self.get_description()
+ logger.info("Description is available (info)!")
+
+ def get_acces(self) -> list:
+ logger.debug("Get access (debug)!")
+ try:
+ url = requests.get(self.source)
+ except Exception:
+ logger.info(f"Invalid url.{self.source}(info)!")
+ print('Could not fetch the URL. Input valid URL.')
+ sys.exit()
+ try:
+ soup = BeautifulSoup(url.content, 'xml')
+ name = soup.find().title.text
+ items = soup.find_all('item')
+ if len(items) == 0:
+ raise Exception
+ except Exception as e:
+ logger.info(f"Invalid url.{self.source}(info)!")
+ print('Could not read feed. Input xml-format URL.')
+ sys.exit()
+ return name, items
+
+ def get_title(self) -> list:
+ logger.debug("Get title from xml (debug)!")
+ return [self.items[i].title.text for i in range(self.limit)]
+
+ def get_pubDate(self) -> list:
+ logger.debug("Get pubDate from xml (debug)!")
+ print([self.items[i].pubDate.text for i in range(self.limit)])
+ return [self.items[i].pubDate.text for i in range(self.limit)]
+
+ def get_link(self) -> list:
+ logger.debug("Get link from xml (debug)!")
+ return [self.items[i].link.text for i in range(self.limit)]
+
+ def get_description(self) -> list:
+ logger.debug("Get description from xml (debug)!")
+ des = []
+ for i in range(self.limit):
+ if self.items[i].description:
+ des.append(self.items[i].description.text)
+ self.clear_description.append(re.sub(r'\<[^>]*\>|(&rsaquo)', '', self.items[i].description.text))
+ else:
+ des.append('No description here')
+ self.clear_description.append('No description here')
+ return des
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..055ff375
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+beautifulsoup4==4.11.1
+charset-normalizer==2.0.12
+idna==3.3
+lxml==4.9.0
+soupsieve==2.3.2.post1
+urllib3==1.26.9
+python-dateutil==2.8.2
+reportlab==3.6.10
+json2html==1.3.0
+requests==2.27.1
\ No newline at end of file
diff --git a/rss_reader.py b/rss_reader.py
new file mode 100644
index 00000000..0addbddb
--- /dev/null
+++ b/rss_reader.py
@@ -0,0 +1,118 @@
+import argparse
+import sys
+from loguru import logger
+
+from reader import Reader
+from printer import Printer
+from converter import Converter
+
+VERSION = '4.0'
+
+logger.add("debug.log", format="{time} {level} {message}", level="DEBUG")
+
+
+def args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Choose type of the interface")
+ parser.add_argument("-v", "--version", action="store_true", help="Print version info")
+ parser.add_argument("--json", action="store_true", help="Print result as JSON")
+ parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages.Print logs.")
+ parser.add_argument("--limit", type=int,
+ help="Limit news topics. If it's not specified, then you get all available feed")
+ parser.add_argument("--date", type=str,
+ help="It should take a date in Ymd format.The new from the specified day will be printed out.")
+ parser.add_argument("--html", action="store_true", help="It convert data to HTML-format in file output.html.")
+ parser.add_argument("source", nargs="?", type=str, help="RSS URL")
+ args = parser.parse_args()
+ return args
+
+
+def date_search(data: dict, date: str) -> dict:
+ if len(date) < 7 or not (date.isdigit()):
+ logger.info(f"Invalid date.{date}!")
+ print(f"Invalid date.{date}. Use pattern YMD!")
+ raise sys.exit()
+
+ logger.debug("Start data searching (debug)!")
+ new_dates = []
+ month = {"Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06", "Jul": "07", "Aug": "08",
+ "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12"}
+ for d in data["pubDate"]:
+ for key in month.keys():
+ if key in d:
+ new_dates.append(d.replace(key, month[key]))
+ new_date = date[6::] + ' ' + date[4:6:] + ' ' + date[:4:]
+ list_of_index = []
+
+ for i in range(len(new_dates)):
+ if new_date in new_dates[i]:
+ list_of_index.append(i)
+ if len(list_of_index) == 0:
+ logger.info(f"No information found.{date}!")
+ print(f"No information found.{date}.")
+ raise sys.exit()
+ new_data = {"name": data["name"],
+ "size": len(list_of_index),
+ "title": [data["title"][i] for i in list_of_index],
+ "pubDate": [data["pubDate"][i] for i in list_of_index],
+ "description": [data["description"][i] for i in list_of_index],
+ "link": [data["link"][i] for i in list_of_index]
+ }
+ return new_data
+
+
+def main():
+ if not args().verbose:
+ logger.remove()
+ if args().version:
+ logger.debug("Version call (debug)!")
+ print('Version:' + VERSION)
+ logger.info("Print version (info)!")
+ sys.exit()
+ elif args().date and args().source == None:
+ converter = Converter()
+ logger.debug("Convert to json call (debug)!")
+ converter.to_JSON(date_search(converter.from_json(), args().date))
+ logger.info("Succesful data search (info)!")
+ logger.info("Save to json file (info)!")
+ printer = Printer(converter.from_json())
+ print(printer)
+ else:
+ logger.debug("Reader call (debug)!")
+ my_reader = Reader(args().source, args().limit) if args().limit else Reader(args().source)
+ converter = Converter(my_reader)
+ logger.debug("Convert to json call (debug)!")
+ converter.to_JSON()
+ logger.info("Save to json file (info)!")
+ if args().date:
+ date_search(converter.from_json(), args().date)
+ converter.to_JSON(date_search(converter.from_json(), args().date))
+ logger.info("Succesful data search (info)!")
+ else:
+ converter.to_JSON()
+ logger.info("Save to json file (info)!")
+ logger.debug("Printer call (debug)!")
+ printer = Printer(converter.from_json())
+ print(printer)
+ logger.info("Print information (info)!")
+
+ if args().json:
+ logger.debug("Json-style call (debug)!")
+ print("Json style:\n")
+ print(converter.from_json())
+ logger.info("Print json-style information (info)!")
+
+ if args().html:
+ try:
+ converter.to_HTML()
+ except Exception:
+ logger.error(f"Error with HTML-file(error)!")
+ print('Error: convert to HTML-file does not work with this URL ')
+
+
+if __name__ == '__main__':
+ try:
+ main()
+ except Exception:
+ logger.error(f"Unexpected error (error)!")
+ print(f"Unexpected error")
+ raise sys.exit()
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..51d89817
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,30 @@
+import os
+from setuptools import find_packages, setup
+
+
+def read(file_name):
+ with open(os.path.join(os.path.dirname(__file__), file_name)) as file:
+ return file.read()
+setup(
+ name="rss_reader",
+ version="4.0",
+ author="Maya Voyshnis",
+ author_email="vvvoyshnism@gmail.com",
+ description='Python RSS Parser',
+ # long_description=open('README.txt').read(),
+ packages=find_packages(),
+ install_requires=[
+ "wheel",
+ "setuptools",
+ "argparse",
+ "requests",
+ "beautifulsoup4",
+ "python-dateutil",
+ "loguru"
+ ],
+ entry_points={
+ 'console_scripts': [
+ 'rss_reader=rss_reader.main:main'
+ ],
+ }
+)
diff --git a/test.py b/test.py
new file mode 100644
index 00000000..c1b03022
--- /dev/null
+++ b/test.py
@@ -0,0 +1,62 @@
+import unittest
+
+from reader import Reader
+from converter import Converter
+from printer import Printer
+
+
+class TestReader(unittest.TestCase):
+
+ def setUp(self):
+ self.reader1 = Reader("https://www.onliner.by/feed", 1)
+ self.reader2 = Reader("https://www.onliner.by/feed", 2)
+
+ def test_get_acces(self):
+ self.assertEqual(type(self.reader1.get_title()), list)
+
+ def test_get_title(self):
+ self.assertEqual(len(self.reader1.get_title()), 1)
+
+ def test_get_pubDate(self):
+ self.assertEqual(len(self.reader2.get_pubDate()), 2)
+
+ def test_get_link(self):
+ self.assertEqual(len(self.reader2.get_pubDate()), 2)
+
+ def test_get_descriprion(self):
+ self.assertEqual(len(self.reader1.get_pubDate()), 1)
+
+ def test_get_acces(self):
+ self.assertEqual(type(self.reader1.get_title()), list)
+
+
+class TestConverter(unittest.TestCase):
+ def setUp(self):
+ self.converter1 = Converter(Reader("https://feeds.fireside.fm/bibleinayear/rss", 3))
+ self.converter2 = Converter()
+
+ def test_to_dict(self):
+ self.assertEqual(len(self.converter1.to_dict()), 6)
+
+ def test_from_json(self):
+ self.assertEqual(len(self.converter1.from_json()), 6)
+
+ def test_from_json2(self):
+ self.assertEqual(type(self.converter2.from_json()), dict)
+
+ def test_to_HTML(self):
+ self.assertEqual(self.converter1.to_HTML(), True)
+
+
+class TestPrinter(unittest.TestCase):
+ def setUp(self):
+ self.converter1 = Converter(Reader("https://feeds.fireside.fm/bibleinayear/rss", 3))
+ self.printer = Printer(self.converter1.from_json())
+
+ def test_print(self):
+ print(self.printer)
+
+
+if __name__ == "__main__":
+ unittest.main()
+