From 9ae97768b67c7718c57e8b762e9908c8efe2aecf Mon Sep 17 00:00:00 2001 From: superpoussin22 Date: Wed, 12 Jun 2019 10:24:47 +0200 Subject: [PATCH 1/2] add proxy settings --- play_scraper/settings.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/play_scraper/settings.py b/play_scraper/settings.py index 3f7fb15..3ac83b1 100644 --- a/play_scraper/settings.py +++ b/play_scraper/settings.py @@ -37,3 +37,15 @@ # Regex to find page tokens within scrip tags TOKEN_RE = r'GAEiA[\w=]{3,7}:S:ANO1lj[\w]{5}' + +#========UPSTREAM PROXY SETTINGS ============== +# If you are behind a Proxy +UPSTREAM_PROXY_ENABLED = False +UPSTREAM_PROXY_SSL_VERIFY = True +UPSTREAM_PROXY_TYPE = "http" +UPSTREAM_PROXY_IP = "127.0.0.1" +UPSTREAM_PROXY_PORT = 3128 +UPSTREAM_PROXY_USERNAME = "" +UPSTREAM_PROXY_PASSWORD = "" + #============================================== + From 4e6aeb0aa15a7c68353fb46a45cf3d6171f5c9f8 Mon Sep 17 00:00:00 2001 From: superpoussin22 Date: Wed, 12 Jun 2019 10:32:00 +0200 Subject: [PATCH 2/2] add proxy support --- play_scraper/utils.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/play_scraper/utils.py b/play_scraper/utils.py index a320783..25c4738 100644 --- a/play_scraper/utils.py +++ b/play_scraper/utils.py @@ -89,6 +89,26 @@ def build_collection_url(category='', collection=''): return url +def upstream_proxy(flaw_type): + """Set upstream Proxy if needed""" + if s.UPSTREAM_PROXY_ENABLED: + if not s.UPSTREAM_PROXY_USERNAME: + proxy_port = str(s.UPSTREAM_PROXY_PORT) + proxy_host = s.UPSTREAM_PROXY_TYPE + '://' + \ + s.UPSTREAM_PROXY_IP + ':' + proxy_port + proxies = {flaw_type: proxy_host} + else: + proxy_port = str(s.UPSTREAM_PROXY_PORT) + proxy_host = s.UPSTREAM_PROXY_TYPE + '://' + s.UPSTREAM_PROXY_USERNAME + \ + ':' + s.UPSTREAM_PROXY_PASSWORD + "@" + \ + s.UPSTREAM_PROXY_IP + ':' + proxy_port + proxies = {flaw_type: proxy_host} + else: + proxies = {flaw_type: None} + verify = bool(s.UPSTREAM_PROXY_SSL_VERIFY) + return proxies, verify + + def send_request(method, url, data=None, params=None, headers=None, timeout=30, verify=True, allow_redirects=False): """Sends a request to the url and returns the response. @@ -108,6 +128,7 @@ def send_request(method, url, data=None, params=None, headers=None, data = generate_post_data() try: + proxies, verify = upstream_proxy('https') response = requests.request( method=method, url=url, @@ -115,6 +136,7 @@ def send_request(method, url, data=None, params=None, headers=None, params=params, headers=headers, timeout=timeout, + proxies=proxies, verify=verify, allow_redirects=allow_redirects) if not response.status_code == requests.codes.ok: