Skip to content
Merged

V35 #59

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion placekey/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.34'
__version__ = '0.0.35'
95 changes: 95 additions & 0 deletions placekey/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from json import JSONDecodeError

import backoff
import pandas as pd
import requests
from typing import Set, Dict
from ratelimit import limits, RateLimitException
from .general import _post_request_function

Expand Down Expand Up @@ -88,6 +90,26 @@ class PlacekeyAPI:

DEFAULT_QUERY_ID_PREFIX = "place_"

MIN_INPUTS = [
['latitude', 'longitude'],
['street_address', 'city', 'region', 'postal_code'],
['street_address', 'region', 'postal_code'],
['street_address', 'region', 'city'],
]

PLACEKEY_OUTPUTS = {
"placekey",
"address_placekey",
"building_placekey",
"gers",
"confidence_score",
"upi",
"geoid",
"parcel",
"gers",
"address_confidence_score"
}

def __init__(self, api_key=None, max_retries=DEFAULT_MAX_RETRIES, logger=log,
user_agent_comment=None):
self.api_key = api_key
Expand Down Expand Up @@ -120,6 +142,79 @@ def __init__(self, api_key=None, max_retries=DEFAULT_MAX_RETRIES, logger=log,
calls=self.BULK_REQUEST_LIMIT,
period=self.BULK_REQUEST_WINDOW,
max_tries=self.max_retries)

def _has_minimum_inputs(self, user_inputs: Set[str]) -> bool:
for inputs in self.MIN_INPUTS:
hasRequiredInputs = True
for key in inputs:
if key not in user_inputs:
hasRequiredInputs = False
break
if hasRequiredInputs:
return True
return False

def _join_pandas_df(self, df1: pd.DataFrame, column_mapping_1: Dict, df2: pd.DataFrame, column_mapping_2: Dict, how: str = 'inner', on: str = "placekey", fields=None, batch_size=MAX_BATCH_SIZE, verbose=False):
fields = [on] if fields is None else fields + [on]
if on not in df1:
if on in self.PLACEKEY_OUTPUTS:
df1 = self._placekey_pandas_df(df1, column_mapping=column_mapping_1, fields=fields, batch_size=batch_size, verbose=verbose, return_original_values=True)
else:
raise ValueError("The first dataset does not contain the join key {}".format(on))
if on not in df2:
if on in self.PLACEKEY_OUTPUTS:
df2 = self._placekey_pandas_df(df2, column_mapping=column_mapping_2, fields=fields, batch_size=batch_size, verbose=verbose, return_original_values=True)
else:
raise ValueError("The second dataset does not contain the join key {}".format(on))

return pd.merge(df1, df2, how=how, on=on)


def _placekey_pandas_df(self, df: pd.DataFrame, column_mapping: Dict, fields=None, batch_size=MAX_BATCH_SIZE, verbose=False, return_original_values=True):
"""
Takes a DataFrame and a list of column names that map to placekey input fields and returns a placekey'd pandas dataframe.

Args:
:param df (pd.DataFrame): The input DataFrame.
:param column_mapping (dict): List of column names to map as inputs to the method.
:param fields: A list of requested parameters other than placekey. For example: address_placekey, building_placekey
Defaults to None
:param batch_size: Integer for the number of places to lookup in a single batch.
Defaults to 100, and cannot exceeded 100.
:param verbose: Boolean for whether or not to log additional information.
Defaults to False

Returns:
- pd.DataFrame: The updated DataFrame with new rows for placekey outputs
"""
if not self._validate_query(column_mapping):
raise ValueError(
"Some queries contain keys other than: {}".format(self.QUERY_PARAMETERS))

if not self._has_minimum_inputs(column_mapping.keys()):
raise ValueError(
"The inputted DataFrame doesn't have enough information. Refer to minimum inputs documentation here: https://docs.placekey.io/documentation/placekey-api/input-parameters/minimum-inputs")

temp_query_id = 'temp_query_id'
df[temp_query_id] = ''
places = []
for i, row in df.iterrows():
place = {}
for place_key, column_name in column_mapping.items():
if column_name in df.columns and pd.notna(row[column_name]):
place[place_key] = row[column_name]
query_id = self.DEFAULT_QUERY_ID_PREFIX + str(i)
place['query_id'] = query_id
df.at[i, temp_query_id] = query_id
places.append(place)
result = self.lookup_placekeys(places=places, fields=fields, batch_size=batch_size, verbose=verbose)
result_df = pd.DataFrame(result).rename(columns={"query_id": temp_query_id})

if not return_original_values:
return result_df
merged_df = pd.merge(df, result_df, how='inner', on=temp_query_id).drop([temp_query_id], axis=1)
return merged_df


def lookup_placekey(self,
fields=None,
Expand Down
27 changes: 27 additions & 0 deletions placekey/placekey.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import re
import json
from typing import List
from math import asin, cos, radians, sqrt
import ast

Expand Down Expand Up @@ -97,6 +98,32 @@ def return_free_datasets_location_by_name(name: str, url: bool = False):
raise ValueError(response.reason)
else:
raise Exception("Something went wrong. Please contact Placekey.")

def return_free_dataset_joins_by_name(names: List[str], url: bool = False):
"""
Get the S3 location of a free dataset join by their names. Find names using list_free_datasets. Raises ValueError if names are not correct.

:param name: Dataset Names (list of str)
:param name: Return a URL or S3 URI? Default is False (S3 URI)
:return: The public S3 locations of the joins in JSON form for each type: outer, inner, left, and right join
"""
func = _get_request_function(
headers={},
url="https://api.placekey.io/placekey-py/v1/get-public-join-from-names",
calls=3,
period=60,
max_tries=20
)
response = func(params={
'public_datasets': ",".join(names),
'url': url
})
if response.status_code == 200:
return json.loads(response.text)
elif response.status_code >= 400 and response.status_code < 500:
raise ValueError(response.reason)
else:
raise Exception("Something went wrong. Please contact Placekey.")

def _get_header_int():
"""
Expand Down
30 changes: 30 additions & 0 deletions placekey/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import random
import unittest
import pandas as pd

import pytest

Expand Down Expand Up @@ -121,5 +122,34 @@ def test_lookup_placekeys_slow(self):
self.assertEqual(len(results), num_samples)
self.assertTrue(all(['placekey' in r for r in results]))

def test_pandas_placekey_and_join(self):
df = pd.DataFrame({
"address": ["1543 Mission Street, Floor 3", "598 Portola Dr", None],
"city": ["San Francisco", "San Francisco", None],
"region": ["CA", "CA", None],
"postal": ["94105", "94131", None],
"country": ["US", "US", None],
"latitude": [None, None, 37.7371],
"longitude": [None, None, -122.44283]
})

column_mappings = {
"street_address": "address",
"city": "city",
"region": "region",
"postal_code": "postal",
"iso_country_code": "country",
"latitude": "latitude",
"longitude": "longitude"
}

df_with_placekeys = self.pk_api._placekey_pandas_df(df, column_mappings, fields=['address_placekey', 'address_placekey', 'address_confidence_score'])
self.assertTrue('address_placekey' in df_with_placekeys)
self.assertTrue('address_confidence_score' in df_with_placekeys)
self.assertTrue('placekey' in df_with_placekeys)
double_join = self.pk_api._join_pandas_df(df_with_placekeys, {}, df.copy(deep=True), column_mappings, on='address_placekey')
self.assertTrue('city_x' in double_join)
self.assertTrue('city_y' in double_join)



1 change: 1 addition & 0 deletions placekey/tests/test_placekey.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,3 +362,4 @@ def test_reading_public_dataset_locations(self):
self.assertGreater(len(dataset_list), 0)
for name in dataset_list:
self.assertNotEqual("", pk.return_free_datasets_location_by_name(name))
self.assertNotEqual("", pk.return_free_dataset_joins_by_name([dataset_list[1], dataset_list[2]])['outer'])
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_version():
long_description_content_type="text/markdown",
url="https://github.com/Placekey/placekey-py",
packages=setuptools.find_packages(),
install_requires=['h3>=4.2.1,<5', 'shapely', 'requests', 'ratelimit', 'backoff', 'boto3'],
install_requires=['h3>=4.2.1,<5', 'shapely', 'requests', 'ratelimit', 'backoff', 'boto3', 'pandas'],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
Expand Down