Tests and Documentation Updates (#2)

mattbaumversium · web-flow · commit 77ef6726d390 · 2023-05-24T10:56:44.000-07:00
* Retry only on 429 and 500. Additional tests and documentation

* Update version

* Change client-server timeout padding
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ from reach import ReachClient
 ```
 2) Pass your [API Key](https://app.versium.com/account/manage-api-keys) to the ReachClient constructor.
 ```python
-client = ReachClient('path-key-012345678')
+client = ReachClient('api-key-012345678')
 ```
 3) Run the `append` method of your `ReachClient` object with the API name, input records, desired outputs (if applicable),
 and any extra config parameters you wish to pass.
@@ -81,6 +81,9 @@ Results are returned as a list of QueryResult objects, which contain the followi
         If the client errored out during a request, this stores the error object
 
 
+- **error_msg**:
+        Stores additional info about query errors.
+
 # Things to keep in mind
 - The default rate limit for Reach APIs is 20 queries per second
 - You must have a provisioned API key for this function to work. If you are unsure where to find your API key, 
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "versium-reach-sdk"
-version = "1.0.0"
+version = "1.1.0"
 authors = [
   { name="Versium Analytics, Inc.", email="opensource@versium.com" },
 ]
diff --git a/reach/append.py b/reach/append.py
@@ -14,12 +14,33 @@
 API_VERSION = "/v2/"
 
 
-async def _fetch(session, record, query_params, path, headers, attempts_left):
-    """Internal fetch method."""
+async def _fetch(session, record, query_params, path, headers):
+    """Make an HTTP request to the API
+
+    Parameters
+    ----------
+    session : aiohttp.ClientSession
+
+    record : QueryRecord
+
+    query_params : dict
+        Additional query parameters to pass to the API call
+
+    path : string
+        Full path of the Versium Reach API endpoint
+
+    headers : dict
+        Additional headers to pass with the HTTP request.
+
+    Returns
+    -------
+    QueryResult
+    """
     if query_params is None:
         query_params = {}
     row_dict = {key: value for key, value in record.data.items() if value is not None}
     idx = record.index
+    err_msg = ""
     result = QueryResult()
 
     params = {**query_params, **row_dict}
@@ -32,8 +53,9 @@ async def _fetch(session, record, query_params, path, headers, attempts_left):
             result.headers = dict(response.headers)
 
             if not result.success:
-                logger.error(f"Unsuccessful url fetch: {result.reason}\n\tIndex: {idx}\n\tURL: {API_BASE_URL + path}?{urllib.parse.urlencode(params)}"
-                             f"\n\tResponse Status: {result.http_status}\n\tAttempts Left: {attempts_left:d}")
+                err_msg = f"Unsuccessful url fetch: {result.reason}\n\tIndex: {idx}\n\tURL: {API_BASE_URL + path}?{urllib.parse.urlencode(params)}"\
+                          f"\n\tResponse Status: {result.http_status}"
+                result.error_msg = err_msg
                 return result
 
             result.body_raw = await response.read()
@@ -51,13 +73,50 @@ async def _fetch(session, record, query_params, path, headers, attempts_left):
     except aiohttp.ClientError as e:
         result.request_error = e
         status = getattr(response, "status", "UNKNOWN")
-        logger.error(f"Error during url fetch: {e.message}\n\tIndex: {idx}\n\tURL: {path}?{urllib.parse.urlencode(params)}"
-                     f"\n\tResponse Status: {status}\n\tAttempts Left: {attempts_left:d}")
+        err_msg = f"Error during url fetch: {e.message}\n\tIndex: {idx}\n\tURL: {path}?{urllib.parse.urlencode(params)}"\
+                  f"\n\tResponse Status: {status}"
+    result.error_msg = err_msg
     return result
 
 
-async def _create_tasks(api, records, query_params, headers=None, *, queries_per_second=20, n_connections=100, timeout=20, n_retry=3,
-                        retry_wait_time=3):
+async def _create_tasks(api, records, query_params, headers=None, *, n_retry=3, queries_per_second=20, n_connections=100, retry_wait_time=3,
+                        timeout=20):
+    """ Split the API calls into asynchronous tasks and wrap them in a rate limiter.
+
+        Parameters
+        ----------
+        api : string
+            Specifies the name of the Versium Reach API endpoint to query ('contact', 'demographic', 'b2conlineaudience', etc.)
+
+        records : list[QueryRecord]
+            List containing QueryRecord objects
+
+        query_params : dict
+            Additional query parameters to pass to each API call (e.g. {'cfg_max_recs': 1})
+
+        headers : dict
+            Additional header parameters  to pass to the API call.
+
+        n_retry : int
+            Number of times to retry the query if it fails.
+
+        queries_per_second : int
+            Maximum number of queries to perform each second to avoid 429 errors.
+
+        n_connections : int
+            Number of simultaneous calls to make when querying.
+
+        retry_wait_time : int
+            Number of seconds to wait until retrying a failed query. The wait time is increased by a multiple of `retry_wait_time` every time
+            the query fails (e.g. 0, 3, 6, 9, 12, etc.)
+
+        timeout : float
+            Number of seconds to wait for the response before timing out.
+
+        Returns
+        -------
+        list[dict]: List of responses from the API calls. This will be in the same order as given in the input.
+        """
     tasks = []
     limit = RateLimiter(max_calls=queries_per_second,
                         period=1,
@@ -78,6 +137,42 @@ async def _create_tasks(api, records, query_params, headers=None, *, queries_per
 
 def query_api(api, records, query_params, headers=None, *, n_retry=3, queries_per_second=20, n_connections=3, retry_wait_time=3,
               timeout=3):
+    """ Query the Versium Reach API and return the results.
+
+    Parameters
+    ----------
+    api : string
+        Specifies the name of the Versium Reach API endpoint to query ('contact', 'demographic', 'b2conlineaudience', etc.)
+
+    records : list[dict]
+        List containing records as key, value pairs e.g [{'first': 'John', 'last': 'Smith'}]
+
+    query_params : dict
+        Additional query parameters to pass to each API call (e.g. {'cfg_max_recs': 1})
+
+    headers : dict
+        Additional header parameters  to pass to the API call.
+
+    n_retry : int
+        Number of times to retry the query if it fails.
+
+    queries_per_second : int
+        Maximum number of queries to perform each second to avoid 429 errors.
+
+    n_connections : int
+        Number of simultaneous calls to make when querying.
+
+    retry_wait_time : int
+        Number of seconds to wait until retrying a failed query. The wait time is increased by a multiple of `retry_wait_time` every time
+        the query fails (e.g. 0, 3, 6, 9, 12, etc.)
+
+    timeout : float
+        Number of seconds to wait for the response before timing out.
+
+    Returns
+    -------
+    list[dict]: List of responses from the API calls. This will be in the same order as given in the input.
+    """
 
     if len(records) < 1:
         logger.warning("No input records were given.")
diff --git a/reach/query_data.py b/reach/query_data.py
@@ -44,10 +44,13 @@ class QueryResult:
 
     request_error: aiohttp.ClientError
         If the client errored out during a request, this stores the error object
+
+    error_msg: string
+        Additional error message
     """
 
     def __init__(self, body=None, success=False, match_found=False, *, http_status=None, reason=None, headers=None,
-                 body_raw=None, request_error=None):
+                 body_raw=None, request_error=None, error_msg=""):
         if body is None:
             body = {}
         self.body = body
@@ -59,6 +62,7 @@ def __init__(self, body=None, success=False, match_found=False, *, http_status=N
         self.body_raw = body_raw
         self.request_error = request_error
         self.reason = reason
+        self.error_msg = error_msg
 
     def __repr__(self):
         headers = str(self.headers)
diff --git a/reach/rate_limiter.py b/reach/rate_limiter.py
@@ -1,6 +1,9 @@
 import asyncio
+import logging
 import time
 
+logger = logging.getLogger(__name__)
+
 
 class RateLimiter(object):
     """ Limits the number of calls to a function within a timeframe. Also limits the number of total active function calls.
@@ -34,6 +37,17 @@ def __init__(self, *, max_calls=20, period=1, n_connections=100, n_retry=3, retr
         self.sem = asyncio.Semaphore(n_connections)
 
     def __call__(self, func):
+        """
+
+        Parameters
+        ----------
+        func : Callable
+            function that returns a QueryResult object
+
+        Returns
+        -------
+        Callable: Input function wrapped with a rate limiting functionality
+        """
 
         async def wrapper(*args, **kwargs):
             # Semaphore will block more than {self.max_connections} from happening at once.
@@ -44,16 +58,32 @@ async def wrapper(*args, **kwargs):
                         await asyncio.sleep(self.__period_remaining())
 
                     self.num_calls += 1
-                    result = await func(*args, attempts_left=self.n_retry - i, **kwargs)
-                    if not result.success and self.n_retry - i > 0:
-                        await asyncio.sleep(self.retry_wait_time * i)
+                    result = await func(*args, **kwargs)
+                    if result.success:
+                        return result
 
+                    if (result.http_status in (429, 500)) and (self.n_retry - i > 0):
+                        logger.error(result.error_msg + f"\n\tAttempts Left: {self.n_retry - i: d}")
+                        await asyncio.sleep(self.retry_wait_time * i)
+                        continue
+                    elif self.n_retry - i <= 0:
+                        logger.error(result.error_msg + f"\n\tNo attempts left.")
                     else:
-                        return result
+                        logger.error(result.error_msg + f"\n\tNot retrying for http status: {result.http_status}")
+
+                    return result
 
         return wrapper
 
     def __period_remaining(self):
+        """ Gets the amount of time remaining in the period. If there is no time remaining, resets the call counter and updates the reset
+        timer.
+
+        Returns
+        -------
+        float: Amount of time remaining in this period
+
+        """
         elapsed = self.clock() - self.last_reset
         period_remaining = self.period - elapsed
         if period_remaining <= 0:
diff --git a/reach/reach.py b/reach/reach.py
@@ -1,6 +1,8 @@
 from .append import query_api
 import logging
 
+CLIENT_SERVER_TIMEOUT_PADDING = 0.2
+
 logger = logging.getLogger(__name__)
 
 
@@ -40,6 +42,9 @@ def __init__(self, api_key, *, queries_per_second=20, n_connections=100, timeout
         self.n_retry = n_retry
         self.retry_wait_time = retry_wait_time
 
+        if timeout <= 0:
+            raise ValueError(f"`timeout` must be greater than 0! Instead got {timeout}.")
+
     def append(self, api_name, input_records, outputs=(), config_params=None):
         """Perform an append on the input records and return the results.
 
@@ -63,11 +68,13 @@ def append(self, api_name, input_records, outputs=(), config_params=None):
         -------
         list[QueryResult]: A list of QueryResult objects
         """
-        query_params = dict()
+        query_params = {"cfg_max_recs": 1}
         if config_params is None:
             config_params = dict()
 
         query_params.update(config_params)
+        # Pad the server-side timeout to be slightly less than client to get a response back.
+        query_params['rcfg_max_time'] = self.timeout - min(self.timeout/2.0, CLIENT_SERVER_TIMEOUT_PADDING)
 
         query_params["output[]"] = list(set(outputs))  # remove duplicate outputs
 
diff --git a/setup.py b/setup.py
@@ -49,7 +49,7 @@ def setup_package():
 
     setup(
             name='versium-reach-sdk',
-            version='1.0.0',
+            version='1.1.0',
             description='Python SDK for querying Versium Reach APIs',
             long_description=long_description,
             url='https://github.com/VersiumAnalytics/reach-api-python-sdk',
diff --git a/tests/base.py b/tests/base.py
@@ -0,0 +1,40 @@
+import logging
+from unittest.mock import patch
+
+from aiohttp.test_utils import AioHTTPTestCase, TestServer, Application
+
+from reach import append
+from tests.utils import make_app, RequestHandler, RateChecker
+
+
+class BaseTestCase(AioHTTPTestCase):
+
+    async def get_application(self):
+        """
+        Override the get_app method to return your application.
+        """
+        self.rate_checker = RateChecker(max_calls=5, max_connections=5, min_calls=1, min_connections=1, period=1)
+        self.request_handler = RequestHandler(self.rate_checker, response_time=0)
+        return make_app(self.request_handler)
+
+    async def get_server(self, app: Application) -> TestServer:
+        """Return a TestServer instance."""
+        test_server = TestServer(app, loop=self.loop, skip_url_asserts=False)
+        return test_server
+
+    async def setUpAsync(self):
+        await super().setUpAsync()
+        patcher = patch.object(append.aiohttp, 'ClientSession', autospec=True)
+        self.addCleanup(patcher.stop)
+        self.ClientSession = patcher.start()
+        self.ClientSession.return_value = self.client
+
+        # Silence aiohttp logs
+        aiohttp_logs = ['aiohttp.access',
+                        'aiohttp.client',
+                        'aiohttp.internal',
+                        'aiohttp.server',
+                        'aiohttp.web',
+                        'aiohttp.websocket']
+        for log_name in aiohttp_logs:
+            logging.getLogger(log_name).handlers = []
diff --git a/tests/test_append.py b/tests/test_append.py
diff --git a/tests/test_reach_client.py b/tests/test_reach_client.py
diff --git a/tests/utils.py b/tests/utils.py

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "versium-reach-sdk"`
`7`		`-version = "1.0.0"`
	`7`	`+version = "1.1.0"`
`8`	`8`	`authors = [`
`9`	`9`	`{ name="Versium Analytics, Inc.", email="opensource@versium.com" },`
`10`	`10`	`]`