diff --git a/requirements.txt b/requirements.txt index 52f81de..7dae38c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ bs4 html5lib requests -selenium +selenium ~= 4.10.0 webdriver-manager pyotp diff --git a/src/amazon_client/amazon_selenium_client.py b/src/amazon_client/amazon_selenium_client.py index 90af028..50a8f51 100644 --- a/src/amazon_client/amazon_selenium_client.py +++ b/src/amazon_client/amazon_selenium_client.py @@ -4,10 +4,15 @@ from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.options import Options as ChromeOptions +from selenium.webdriver.chrome.service import Service as ChromeService +from selenium.common.exceptions import NoSuchElementException +from selenium.webdriver.common.by import By from amazon_client.amazon_client import AmazonClient import platform ORDERS_PAGE = "https://www.amazon.com/gp/css/summary/print.html/ref=ppx_yo_dt_b_invoice_o00?ie=UTF8&orderID={}" +DIGITAL_ORDERS_PAGE = "https://www.amazon.com/gp/digital/your-account/order-summary.html?ie=UTF8&orderID={}&print=1&ref_=ppx_yo_dt_b_dpi_o00" +TRANSACTIONS_PAGE = "https://www.amazon.com/cpe/yourpayments/transactions" class AmazonSeleniumClient(AmazonClient): def __init__(self, userEmail, userPassword, otpSecret): @@ -24,18 +29,23 @@ def __init__(self, userEmail, userPassword, otpSecret): print(f"Attempting to initialize Chrome Selenium Webdriver on platform {platformMachine}...") options = ChromeOptions() options.add_argument('--headless') - self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=options) + self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) print("Successfully initialized Chrome Selenium Webdriver") self.signIn() def getAllOrderIDs(self, pages=1): orderPage = "https://www.amazon.com/gp/your-account/order-history/ref=ppx_yo_dt_b_pagination_1_2?ie=UTF8&orderFilter=months-6&search=&startIndex={}" + digitalOrderPage = "https://www.amazon.com/gp/your-account/order-history/ref=ppx_yo_dt_b_pagination_1_2?ie=UTF8&orderFilter=months-6&search=&startIndex={}&unifiedOrders=0" orderIDs = [] for pageNumber in range(pages): self.driver.get(orderPage.format(pageNumber * 10)) soup = BeautifulSoup(self.driver.page_source, 'html.parser') orderIDs.extend([i.getText() for i in soup.find_all("bdi")]) + + self.driver.get(digitalOrderPage.format(pageNumber * 10)) + soup = BeautifulSoup(self.driver.page_source, 'html.parser') + orderIDs.extend([i.getText() for i in soup.find_all("bdi")]) return orderIDs def doSignIn(self): @@ -44,40 +54,51 @@ def doSignIn(self): self.driver.get("https://amazon.com") time.sleep(1) - accountNav = self.driver.find_element_by_xpath("//a[@data-nav-role ='signin']") + accountNav = self.driver.find_element(By.XPATH, "//a[@data-nav-role ='signin']") accountNav.click() time.sleep(1) - emailEntry = self.driver.find_element_by_id("ap_email") + emailEntry = self.driver.find_element(By.ID, "ap_email") emailEntry.clear() emailEntry.send_keys(self.userEmail) - self.driver.find_element_by_id("continue").click() + self.driver.find_element(By.ID, "continue").click() time.sleep(1) - passwordEntry =self.driver.find_element_by_id("ap_password") + passwordEntry =self.driver.find_element(By.ID, "ap_password") passwordEntry.clear() passwordEntry.send_keys(self.userPassword) - self.driver.find_element_by_name("rememberMe").click() - self.driver.find_element_by_id("signInSubmit").click() + self.driver.find_element(By.NAME, "rememberMe").click() + self.driver.find_element(By.ID, "signInSubmit").click() time.sleep(1) - totpSelect = self.driver.find_element_by_xpath("//input[contains(@value,'TOTP')]") - totpSelect.click() + try: + totpSelect = self.driver.find_element(By.XPATH, "//input[contains(@value,'TOTP')]") + totpSelect.click() - sendCode = self.driver.find_element_by_xpath("//input[@id = 'auth-send-code']") - sendCode.click() + sendCode = self.driver.find_element(By.XPATH, "//input[@id = 'auth-send-code']") + sendCode.click() - time.sleep(1) + time.sleep(1) + except NoSuchElementException: + pass - otpEntry = self.driver.find_element_by_id("auth-mfa-otpcode") + otpEntry = self.driver.find_element(By.ID, "auth-mfa-otpcode") otpEntry.clear() otpEntry.send_keys(totp.now()) - self.driver.find_element_by_id("auth-mfa-remember-device").click() - self.driver.find_element_by_id("auth-signin-button").click() + self.driver.find_element(By.ID, "auth-mfa-remember-device").click() + self.driver.find_element(By.ID, "auth-signin-button").click() time.sleep(1) + try: + phoneSkipButton = self.driver.find_element(By.XPATH, '//*[@id="ap-account-fixup-phone-skip-link"]') + phoneSkipButton.click() + time.sleep(1) + except NoSuchElementException: + pass + + def signIn(self): try: self.doSignIn() @@ -90,12 +111,30 @@ def signIn(self): def interpretDriverErrorPage(self): try: - failElem = self.driver.find_element_by_xpath("//*[contains(text(),'not a robot')]") + failElem = self.driver.find_element(By.XPATH, "//*[contains(text(),'not a robot')]") print("Blocked by Amazon anti-robot. Circumnavigating this is unsupported. Please try again later.") except: pass def getInvoicePage(self, orderID): - myOrderPage = ORDERS_PAGE.format(orderID) + if(orderID[0] == 'D'): + myOrderPage = DIGITAL_ORDERS_PAGE.format(orderID) + else: + myOrderPage = ORDERS_PAGE.format(orderID) self.driver.get(myOrderPage) return self.driver.page_source + + def getTransactionsPage(self, pages=3): + page_sources = [] + + self.driver.get(TRANSACTIONS_PAGE) + for i in range(pages): + page_sources.append(self.driver.page_source) + try: + nextPageButton = self.driver.find_element(By.XPATH, '// *[ @ id = "a-autoid-1"] / span / input') + except NoSuchElementException: + nextPageButton = self.driver.find_element(By.XPATH, '// *[ @ id = "cpefront-mpo-widget"] / div / form / div[2] / div[2] / span / span / input') + nextPageButton.click() + time.sleep(2) + + return page_sources \ No newline at end of file