From 657bbd4f74691271a997f1146d764ecb506e2f38 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 10 Feb 2026 11:52:27 +0100 Subject: [PATCH 1/5] feat: limit max number of items in collection to scan --- posthog/exception_utils.py | 8 ++- posthog/test/test_exception_capture.py | 78 ++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 4cec07e4..8ba96431 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -66,6 +66,7 @@ CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$" _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000 +_MAX_COLLECTION_ITEMS_TO_SCAN = 100 _REGEX_METACHARACTERS = frozenset(r"\.^$*+?{}[]|()") DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 @@ -992,7 +993,9 @@ def _mask_sensitive_data(value, compiled_mask, _seen=None): if isinstance(value, dict): result = {} - for k, v in value.items(): + for i, (k, v) in enumerate(value.items()): + if i >= _MAX_COLLECTION_ITEMS_TO_SCAN: + break key_str = str(k) if not isinstance(k, str) else k if len(key_str) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: result[k] = CODE_VARIABLES_TOO_LONG_VALUE @@ -1002,8 +1005,9 @@ def _mask_sensitive_data(value, compiled_mask, _seen=None): result[k] = _mask_sensitive_data(v, compiled_mask, _seen) return result elif isinstance(value, (list, tuple)): + items_to_scan = value[:_MAX_COLLECTION_ITEMS_TO_SCAN] masked_items = [ - _mask_sensitive_data(item, compiled_mask, _seen) for item in value + _mask_sensitive_data(item, compiled_mask, _seen) for item in items_to_scan ] return type(value)(masked_items) elif isinstance(value, str): diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index a90a000e..ff798c76 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -639,3 +639,81 @@ def test_compile_patterns_fast_path_and_regex_fallback(): # No match assert _pattern_matches("safe_var", mixed) is False + + +def test_mask_sensitive_data_large_dict_truncated(): + from posthog.exception_utils import ( + _MAX_COLLECTION_ITEMS_TO_SCAN, + _compile_patterns, + _mask_sensitive_data, + ) + + compiled_mask = _compile_patterns([r"(?i)password"]) + + large_dict = {f"key_{i}": f"value_{i}" for i in range(300)} + + result = _mask_sensitive_data(large_dict, compiled_mask) + + assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN + + for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): + assert result[f"key_{i}"] == f"value_{i}" + + +def test_mask_sensitive_data_large_list_truncated(): + from posthog.exception_utils import ( + _MAX_COLLECTION_ITEMS_TO_SCAN, + _compile_patterns, + _mask_sensitive_data, + ) + + compiled_mask = _compile_patterns([r"(?i)password"]) + + large_list = [f"item_{i}" for i in range(300)] + + result = _mask_sensitive_data(large_list, compiled_mask) + + assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN + + for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): + assert result[i] == f"item_{i}" + + +def test_mask_sensitive_data_large_tuple_truncated(): + from posthog.exception_utils import ( + _MAX_COLLECTION_ITEMS_TO_SCAN, + _compile_patterns, + _mask_sensitive_data, + ) + + compiled_mask = _compile_patterns([r"(?i)password"]) + + large_tuple = tuple(f"item_{i}" for i in range(300)) + + result = _mask_sensitive_data(large_tuple, compiled_mask) + + assert isinstance(result, tuple) + assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN + for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): + assert result[i] == f"item_{i}" + + +def test_mask_sensitive_data_small_collections_unaffected(): + from posthog.exception_utils import _compile_patterns, _mask_sensitive_data + + compiled_mask = _compile_patterns([r"(?i)password"]) + + # Small dict - all items preserved + small_dict = {f"key_{i}": f"value_{i}" for i in range(10)} + result = _mask_sensitive_data(small_dict, compiled_mask) + assert len(result) == 10 + + # Small list - all items preserved + small_list = [f"item_{i}" for i in range(10)] + result = _mask_sensitive_data(small_list, compiled_mask) + assert len(result) == 10 + + # Small tuple - all items preserved + small_tuple = tuple(f"item_{i}" for i in range(10)) + result = _mask_sensitive_data(small_tuple, compiled_mask) + assert len(result) == 10 \ No newline at end of file From 4c32b783cae274417125be1fbe107dd15673caca Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 10 Feb 2026 11:56:09 +0100 Subject: [PATCH 2/5] feat: changelog --- CHANGELOG.md | 4 ++++ posthog/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ba3d34b..46011a60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 7.8.6 - 2026-02-09 + +fix: limit collections scanning in code variables + # 7.8.5 - 2026-02-09 fix: further optimize code variables pattern matching diff --git a/posthog/version.py b/posthog/version.py index 9581625a..08a3bb39 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.8.5" +VERSION = "7.8.6" if __name__ == "__main__": print(VERSION, end="") # noqa: T201 From a003d5b82d52986aed70b8cd0a2f30c44dd272c7 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 10 Feb 2026 11:59:48 +0100 Subject: [PATCH 3/5] fix: format --- posthog/test/test_exception_capture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index ff798c76..e6822c8a 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -716,4 +716,4 @@ def test_mask_sensitive_data_small_collections_unaffected(): # Small tuple - all items preserved small_tuple = tuple(f"item_{i}" for i in range(10)) result = _mask_sensitive_data(small_tuple, compiled_mask) - assert len(result) == 10 \ No newline at end of file + assert len(result) == 10 From 4d741c8d2a5ca93b0f1e636c96085718f6eb54a4 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Tue, 10 Feb 2026 12:09:57 +0100 Subject: [PATCH 4/5] feat: test --- posthog/test/test_exception_capture.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index e6822c8a..9ee5b5ec 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -696,24 +696,3 @@ def test_mask_sensitive_data_large_tuple_truncated(): assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): assert result[i] == f"item_{i}" - - -def test_mask_sensitive_data_small_collections_unaffected(): - from posthog.exception_utils import _compile_patterns, _mask_sensitive_data - - compiled_mask = _compile_patterns([r"(?i)password"]) - - # Small dict - all items preserved - small_dict = {f"key_{i}": f"value_{i}" for i in range(10)} - result = _mask_sensitive_data(small_dict, compiled_mask) - assert len(result) == 10 - - # Small list - all items preserved - small_list = [f"item_{i}" for i in range(10)] - result = _mask_sensitive_data(small_list, compiled_mask) - assert len(result) == 10 - - # Small tuple - all items preserved - small_tuple = tuple(f"item_{i}" for i in range(10)) - result = _mask_sensitive_data(small_tuple, compiled_mask) - assert len(result) == 10 From 5bcd2435beb5a8b60be771ad8f4ff11ae90a9124 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Wed, 11 Feb 2026 13:13:25 +0100 Subject: [PATCH 5/5] feat: replace entire collection instead of truncating --- posthog/exception_utils.py | 11 ++++++----- posthog/test/test_exception_capture.py | 27 +++++++++----------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 8ba96431..4cdee77d 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -992,10 +992,10 @@ def _mask_sensitive_data(value, compiled_mask, _seen=None): _seen.add(obj_id) if isinstance(value, dict): + if len(value) > _MAX_COLLECTION_ITEMS_TO_SCAN: + return CODE_VARIABLES_TOO_LONG_VALUE result = {} - for i, (k, v) in enumerate(value.items()): - if i >= _MAX_COLLECTION_ITEMS_TO_SCAN: - break + for k, v in value.items(): key_str = str(k) if not isinstance(k, str) else k if len(key_str) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: result[k] = CODE_VARIABLES_TOO_LONG_VALUE @@ -1005,9 +1005,10 @@ def _mask_sensitive_data(value, compiled_mask, _seen=None): result[k] = _mask_sensitive_data(v, compiled_mask, _seen) return result elif isinstance(value, (list, tuple)): - items_to_scan = value[:_MAX_COLLECTION_ITEMS_TO_SCAN] + if len(value) > _MAX_COLLECTION_ITEMS_TO_SCAN: + return CODE_VARIABLES_TOO_LONG_VALUE masked_items = [ - _mask_sensitive_data(item, compiled_mask, _seen) for item in items_to_scan + _mask_sensitive_data(item, compiled_mask, _seen) for item in value ] return type(value)(masked_items) elif isinstance(value, str): diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index 9ee5b5ec..3d2f8d08 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -641,9 +641,9 @@ def test_compile_patterns_fast_path_and_regex_fallback(): assert _pattern_matches("safe_var", mixed) is False -def test_mask_sensitive_data_large_dict_truncated(): +def test_mask_sensitive_data_large_dict_replaced(): from posthog.exception_utils import ( - _MAX_COLLECTION_ITEMS_TO_SCAN, + CODE_VARIABLES_TOO_LONG_VALUE, _compile_patterns, _mask_sensitive_data, ) @@ -654,15 +654,12 @@ def test_mask_sensitive_data_large_dict_truncated(): result = _mask_sensitive_data(large_dict, compiled_mask) - assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN - - for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): - assert result[f"key_{i}"] == f"value_{i}" + assert result == CODE_VARIABLES_TOO_LONG_VALUE -def test_mask_sensitive_data_large_list_truncated(): +def test_mask_sensitive_data_large_list_replaced(): from posthog.exception_utils import ( - _MAX_COLLECTION_ITEMS_TO_SCAN, + CODE_VARIABLES_TOO_LONG_VALUE, _compile_patterns, _mask_sensitive_data, ) @@ -673,15 +670,12 @@ def test_mask_sensitive_data_large_list_truncated(): result = _mask_sensitive_data(large_list, compiled_mask) - assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN + assert result == CODE_VARIABLES_TOO_LONG_VALUE - for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): - assert result[i] == f"item_{i}" - -def test_mask_sensitive_data_large_tuple_truncated(): +def test_mask_sensitive_data_large_tuple_replaced(): from posthog.exception_utils import ( - _MAX_COLLECTION_ITEMS_TO_SCAN, + CODE_VARIABLES_TOO_LONG_VALUE, _compile_patterns, _mask_sensitive_data, ) @@ -692,7 +686,4 @@ def test_mask_sensitive_data_large_tuple_truncated(): result = _mask_sensitive_data(large_tuple, compiled_mask) - assert isinstance(result, tuple) - assert len(result) == _MAX_COLLECTION_ITEMS_TO_SCAN - for i in range(_MAX_COLLECTION_ITEMS_TO_SCAN): - assert result[i] == f"item_{i}" + assert result == CODE_VARIABLES_TOO_LONG_VALUE