-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcrawling.py
More file actions
36 lines (30 loc) · 989 Bytes
/
crawling.py
File metadata and controls
36 lines (30 loc) · 989 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from time import sleep
from modules.util import jsonify, time_gap, save_json
from modules.crawl import reviewAPI
# Constant Setting
HAS_NEXT_PAGE = True
NEXT_CURSOR = ""
crawled_reviews = {}
# Crawling
while HAS_NEXT_PAGE:
# API Requests
next_reviews = (
reviewAPI().get(uuid="0f101f8c-ec09-39c4-9be0-2f9cc464d332", cursor=NEXT_CURSOR, page_count=20).text
)
next_reviews = jsonify(next_reviews)
# Save as Dictionary
if len(crawled_reviews) == 0:
crawled_reviews = next_reviews
else:
crawled_reviews["reviews"].extend(next_reviews["reviews"])
# Next Request Conditions
HAS_NEXT_PAGE = next_reviews["pageInfo"]["hasNextPage"]
NEXT_CURSOR = (
next_reviews["pageInfo"]["endCursor"]
if ("endCursor" in next_reviews["pageInfo"].keys()) & HAS_NEXT_PAGE
else ""
)
# Time Delay
sleep(time_gap(mu=4))
# Save Dictionary to JSON files
save_json("./database/venom_review.json", crawled_reviews)