Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 65 additions & 3 deletions tests/unit/test_handlers_build_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
"""

from datetime import datetime, timezone
from typing import Union

import numpy as np
import pytest

import ttd_databricks_python.ttd_databricks.handlers.advertiser as adv_handler
from ttd_databricks_python.ttd_databricks.id_types import normalize_id_type
Expand All @@ -20,6 +24,13 @@
_UnsetType = type(UNSET)


# An array<struct> column reaches build_items as a list via the adhoc path
# (collect + asDict) and as a numpy array via the batch path (mapInPandas).
# build_items must handle both, so array-column tests run against each shape.
def _build_array_column(array_type: type, items: list[dict]) -> Union[list, np.ndarray]:
return items if array_type is list else np.array(items, dtype=object)


# --------------------------------------------------------------------------- #
# Advertiser handler #
# --------------------------------------------------------------------------- #
Expand Down Expand Up @@ -119,10 +130,13 @@ def test_builds_offline_conversion_data_item_with_correct_fields(self):
assert isinstance(item.timestamp_utc, datetime)
assert isinstance(item.user_id_array, _UnsetType)

def test_user_ids_converted_to_user_id_array_with_type_codes(self):
@pytest.mark.parametrize("array_type", [list, np.ndarray])
def test_user_ids_converted_to_user_id_array_with_type_codes(self, array_type):
row = {
**self._MINIMAL,
"user_ids": [{"type": "TDID", "id": "test-tdid-value"}, {"type": "DAID", "id": "test-daid-value"}],
"user_ids": _build_array_column(
array_type, [{"type": "TDID", "id": "test-tdid-value"}, {"type": "DAID", "id": "test-daid-value"}]
),
}
item = oc_handler.build_items([row])[0]
assert item.user_id_array == [["0", "test-tdid-value"], ["1", "test-daid-value"]]
Expand All @@ -147,4 +161,52 @@ def test_optional_fields_are_passed_through_when_provided(self):
item = oc_handler.build_items([row])[0]
assert item.order_id == "test-order-id"
assert item.value == "99.99"
assert item.country == "US"
assert item.country == "US"

@pytest.mark.parametrize("array_type", [list, np.ndarray])
def test_multi_element_line_items(self, array_type):
row = {
**self._MINIMAL,
"line_items": _build_array_column(
array_type,
[
{"item_code": "sku1", "name": "first", "qty": "1", "price": "9.99", "cat": "books"},
{"item_code": "sku2", "name": "second", "qty": "2", "price": "5.00", "cat": "toys"},
],
),
}
item = oc_handler.build_items([row])[0]
assert len(item.line_items) == 2
assert item.line_items[0].item_code == "sku1"

@pytest.mark.parametrize("array_type", [list, np.ndarray])
def test_multi_element_privacy_settings(self, array_type):
row = {
**self._MINIMAL,
"privacy_settings": _build_array_column(
array_type,
[
{"privacy_type": "GDPR", "is_applicable": "true", "consent_string": "abc"},
{"privacy_type": "CCPA", "is_applicable": "false", "consent_string": "xyz"},
],
),
}
item = oc_handler.build_items([row])[0]
assert len(item.privacy_settings) == 2
assert item.privacy_settings[0].privacy_type == "GDPR"

@pytest.mark.parametrize("array_type", [list, np.ndarray])
def test_collect_raw_pii_ids_keeps_only_pii_types(self, array_type):
rows = [
{
**self._MINIMAL,
"user_ids": _build_array_column(
array_type,
[{"type": "Email", "id": "a@example.com"}, {"type": "TDID", "id": "device-1"}],
),
}
]
assert oc_handler.collect_raw_pii_ids_per_row(rows) == [["a@example.com"]]

def test_collect_raw_pii_ids_handles_missing_user_ids(self):
assert oc_handler.collect_raw_pii_ids_per_row([self._MINIMAL]) == [[]]
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[OfflineConversionDataI
}

raw_user_ids = row.get("user_ids")
if raw_user_ids:
if raw_user_ids is not None and len(raw_user_ids) > 0:
kwargs["user_id_array"] = [[_user_id_type(user_id["type"]), user_id["id"]] for user_id in raw_user_ids]

for field in ITEM_OPTIONAL_FIELDS:
Expand All @@ -75,7 +75,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[OfflineConversionDataI
kwargs[field] = value

raw_line_items = row.get("line_items")
if raw_line_items:
if raw_line_items is not None and len(raw_line_items) > 0:
kwargs["line_items"] = [
RealTimeConversionEventLineItem(
**{k: v for k, v in (li if isinstance(li, dict) else li.asDict()).items() if v is not None}
Expand All @@ -84,7 +84,7 @@ def build_items(items_data: list[dict[str, Any]]) -> list[OfflineConversionDataI
]

raw_privacy_settings = row.get("privacy_settings")
if raw_privacy_settings:
if raw_privacy_settings is not None and len(raw_privacy_settings) > 0:
kwargs["privacy_settings"] = [
RealTimeConversionEventsPrivacySetting(
**{k: v for k, v in (ps if isinstance(ps, dict) else ps.asDict()).items() if v is not None}
Expand All @@ -103,7 +103,9 @@ def collect_raw_pii_ids_per_row(items_data: list[dict[str, Any]]) -> list[list[s
"""
out: list[list[str]] = []
for row in items_data:
raw_user_ids = row.get("user_ids") or []
raw_user_ids = row.get("user_ids")
if raw_user_ids is None:
raw_user_ids = []
out.append(
[entry["id"] for entry in raw_user_ids if entry["type"] and entry["type"].upper() in RAW_PII_ID_TYPES]
)
Expand Down
Loading