This repository was archived by the owner on Aug 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy patheo_net.py
More file actions
240 lines (194 loc) · 9.18 KB
/
eo_net.py
File metadata and controls
240 lines (194 loc) · 9.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
from lxml import html
import logging
import random
import requests
import time
# BEST PRACTICE, "rate limiting": don't hit server at maximum rate.
# Minimum time between requests.
MIN_REQUEST_INTERVAL = 0.75 # seconds, float
# BEST PRACTICE, "exponential backoff": If you do retries, back them off
# exponentially. If the server is down or struggling to come back up, you'll
# avoid creating a stampede of clients retrying their requests.
# What's the first retry delay? If more retries are needed, double each delay.
INITIAL_RETRY_DELAY = 4.0 # seconds, float
# BEST PRACTICE, "retry limits": don't keep hitting a downed server.
# In this code, if there's a missed update, we can just wait until the next scheduled update
# to try again. So we don't need to retry many times.
# The number of retry attempts.
NUM_RETRIES = 4
# BEST PRACTICE, "jitter": don't retry or repeat requests at fixed times or
# delays. Vary them slightly to desynchronize clients.
# The amount of variation as a float. 0.20 == +/- 20%
JITTER_FACTOR = 0.20
# Note if a logger is not configured when this class is instantiated, an error will issue like:
# No handlers could be found for logger "eo.EO_Net"
# To fix, initialize a logger in your main. This class writes error messages to the logging system.
class EO_Net(object):
"""The EO_Net class provides network functions for the API.
Calls are made against the session stored in self.session.
Calls are rate limited and include retries with jitter, limits, and exponential backoff.
"""
def __init__(self):
self.logger = logging.getLogger(".".join(["eo", self.__class__.__name__]))
self.session = None
self.last_request_time = 0
def get_session(self):
return self.session
def set_session(self, session):
self.session = session
def request_authenticity_token(self, url):
"""Request, parse, and return the authenticity token needed to post to the given URL."""
authenticity_token = ""
# Request the page with the token.
self.check_request_rate()
response = self.request_with_retries(url)
if not response:
self.logger.error("unable to read {0}.".format(url))
return ""
elif response.status_code != requests.codes.ok:
self.logger.error("unable to read: {0}. Status: {1}, response: {2}".
format(url, response.status_code, response.text))
return ""
# Parse out the token.
try:
tree = html.fromstring(response.content)
authenticity_token = tree.xpath("string(//input[@name='authenticity_token']/@value)")
except Exception as e:
self.logger.error("problem parsing authenticity token: " + str(e))
return authenticity_token
def post_with_authenticity(self, url, payload):
"""Post to the given URL, first obtaining an authenticity token and adding it to the
payload.
Return the request result or None.
"""
authenticity_token = self.request_authenticity_token(url)
if not authenticity_token:
return None
payload["authenticity_token"] = authenticity_token
return self.post_payload(url, payload)
def post_payload(self, url, payload):
"""Post the given payload to the given URL
Args:
url: the target URL
payload: the key/values to post.
Returns:
The server's response or None.
"""
self.check_request_rate()
response = self.request_with_retries(url, method="POST", params=payload)
if response and response.status_code == requests.codes.ok:
return response
if not response:
self.logger.error("unable to post to {0}.".format(url))
else:
self.logger.error("unable to post to {0}. Status: {1}, response: {2}".
format(url, response.status_code, response.text))
return None
def check_request_rate(self):
"""Are we making requests too fast? If so, pause.
Specifically, check the current time against the last request time. If
less than MIN_REQUEST_INTERVAL, sleep the remaining time.
TODO: This function pauses the whole program. Improvement: create a
request queue that handles request asynchronously.
"""
interval = time.clock() - self.last_request_time
if interval < MIN_REQUEST_INTERVAL:
time.sleep(MIN_REQUEST_INTERVAL - interval)
def execute_request(self, url, params=None, method="GET"):
"""Request the given URL with the given method and parameters.
Args:
url: The URL to call.
params: The optional parameters.
method: The HTTP request type {GET, POST, PUT, DELETE}.
Returns:
The server response or None.
"""
self.check_request_rate()
try:
if method == "GET":
return self.session.get(url, params=params)
elif method == "POST":
return self.session.post(url, params=params)
elif method == "PUT":
return self.session.put(url)
elif method == "DELETE":
return self.session.delete(url)
else:
self.logger.error("unknown request type: {0}".format(method))
except Exception as e:
self.logger.error("problem making HTTP request: {0}".format(e))
return None
def request_with_retries(self, url, params=None, method="GET"):
"""Call the given request, returning the response or None if error.
Retry the request up to NUM_RETRIES times if:
1) execute_request() returns None, which would indicate a problem caught the request
library. These would include network connectivity issues or request timeouts.
OR
2) the server returns a 50X response code. Note that 30X, and 40X responses are not errors
that could benefit from retries, so are returned immediately.
Args:
url: The URL to call.
params: The optional parameters.
method: The HTTP request type {GET, POST, PUT, DELETE}.
Returns:
The server response or None.
"""
retries = 0
delay = INITIAL_RETRY_DELAY
while True:
pass
response = self.execute_request(url, params=params, method=method)
if response:
if response.status_code < 500:
return response
else:
self.logger.error("from API server. Response: {0} {1}.".
format(response.status_code, response.reason))
if retries == NUM_RETRIES:
break
# Jitter: avoid hitting servers at fixed times or with fixed delays. Instead,
# prevent client synchronization and server overloads by varying access times.
jittered_delay = self.jitter(delay, JITTER_FACTOR)
# retries + 1: Use natural numbers for readability.
self.logger.error(
"failed request {0} of {1} to URL '{2}'. Retrying in {3:.1f} seconds.".format(
retries + 1, NUM_RETRIES + 1, url, jittered_delay))
# Exponential backoff: Double the delay between each retry, or equivilently,
# delay = INITIAL_RETRY_DELAY * 2 ** retries
# The constant, 2 in this case, or doubling each delay, doesn't matter so long as the
# delay increases significantly with each retry, allowing congestion at the server
# to disperse.
delay *= 2
retries += 1
time.sleep(jittered_delay)
self.logger.error("maximum HTTP request attempts ({0}) exceeded to URL '{1}'.".format(
NUM_RETRIES + 1, url))
return None
def make_request(self, url, params=None, method="GET", parse_json=False):
"""Create and make the given request, returning the result as JSON if requested.
Return None on error, including HTTP errors."""
response = self.request_with_retries(url, params=params, method=method)
if response is None:
return None
elif response.status_code < 200 or response.status_code >= 300:
self.logger.error("sent {0} to url {1} with parameters {2}. Response: {3} {4}".
format(method, url, params, response.status_code, response.reason))
return None
if not parse_json:
return response
try:
return response.json()
except:
self.logger.error("unable to parse JSON")
return None
def jitter(self, interval, factor):
"""Return the interval +/- a randomized amount of the interval.
Example:
To jitter t by 20%: t = jitter(t, 0.20)
Ie, if t = 10.0, the resulting t will be in {8.0, 12.0}
Args:
interval: a time period to be jittered.
factor: the portion of interval to include in the randomization, expressed as a float
between 0.0 and 1.0.
"""
return interval + interval * factor * (2.0 * random.random() - 1.0)