-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTixScraper.py
More file actions
46 lines (37 loc) · 1.22 KB
/
TixScraper.py
File metadata and controls
46 lines (37 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import atexit
from urllib import request
import json
from datetime import datetime
import os
from apscheduler.schedulers.background import BackgroundScheduler
from pprint import pprint
def schedule_scrape_job():
"""
schedules scrape to happen every day at 3:00 AM
"""
cron = BackgroundScheduler(daemon=True)
cron.start() # explicitly start cron jobs
now = datetime.now()
def parse_item(item):
"""
Parse item and send data to db
:param item: container for data from tix
"""
# TODO: implement
pprint(item)
@cron.scheduled_job("interval", days=30, start_date=now.replace(day=now.day+1, hour=3))
def scrape():
"""
Scrape tix to get upcomming events
"""
# load resource
res = request.urlopen(os.getenv("TIX_URL", ""))
# check charset
encoding = res.info().get_content_charset('utf-8')
# extract data
data = json.loads(res.read().decode(encoding))
for item in data:
# TODO: take information from item and place in db
parse_item(item)
# hook cron shutdown to program exit so we don't leak memory
atexit.register(lambda: cron.shutdown(wait=False))