-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
84 lines (72 loc) · 2.94 KB
/
app.py
File metadata and controls
84 lines (72 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium_stealth import stealth
import time
app = Flask(__name__)
CORS(app)
@app.route('/')
def index():
return render_template("index.html")
@app.route('/process', methods=['POST'])
def process():
if request.method == 'POST':
data = request.get_json() # Get data in JSON format from the extension
asin = data.get('asin', None)
if not asin:
return jsonify({'error': 'ASIN not provided'})
revlist = []
service = Service(executable_path='/Users/luke/Documents/Projects/first-chrome-extension/chromedriver')
options = webdriver.ChromeOptions()
options.add_argument('--headless') # option so that the window doesn't pop up
driver = webdriver.Chrome(service=service, options = options) # change filepath to downloaded webdriver
# use selenium stealth here to bypass bot detection reliably
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
#time.sleep(1)
driver.get('https://www.amazon.com/dp/' + asin)
# get info about the product
title = driver.find_element(By.ID, 'title')
titlestr = title.text
rating = driver.find_element(By.ID, 'acrPopover')
ratingstr = rating.text
cost = driver.find_element(By.CLASS_NAME, 'a-price-whole')
coststr = cost.text
#print('Title: ' + titlestr)
#print('Rating: ' + ratingstr + " out of 5")
#print('Cost: $' + coststr)
# iterate through the pages of reviews and put them into a list
pages = 4
for i in range(1, pages + 1):
driver.get('https://www.amazon.com/product-reviews/' + asin + '?pageNumber=' + str(i))
#time.sleep(1)
reviews = driver.find_elements(By.CLASS_NAME, 'review-text-content')
for review in reviews:
revlist.append(review.text)
driver.quit()
#print('# of reviews scraped: ' + str(len(revlist)))
#return revlist
#return f"""ASIN: {asin}<br>Product Title: {titlestr}
# <br>Product Rating: {ratingstr} out of 5
# <br>Product Cost: ${coststr}
# """
result = {
'asin': asin,
'product_title': titlestr,
'rating': ratingstr,
'cost': coststr,
'reviews': revlist
}
return jsonify(result)
if __name__ == '__main__':
app.run(debug=True)