diff --git a/.jules/bolt.md b/.jules/bolt.md index 5006a2b..da05510 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -47,3 +47,7 @@ ## 2026-01-28 - [Avoid ThreadPoolExecutor Overhead] **Learning:** `ThreadPoolExecutor` context management and thread creation overhead is non-negligible for single-item or very small workloads. If a parallelizable task only has 1 unit of work (e.g., 1 batch), running it synchronously in the main thread is faster and uses less memory than spinning up a pool. **Action:** Check the size of the workload before creating a `ThreadPoolExecutor`. If `len(tasks) == 1`, bypass the executor and run directly. + +## 2024-05-24 - [Skip Validation for Known Data] +**Learning:** Performing expensive validation (e.g. regex) on data that is already known to be valid (e.g. exists in trusted remote state) is redundant. Checking existence in a local set (O(1)) before validation avoids CPU overhead for duplicates. +**Action:** In filtering loops, check "is already processed/known" before "is valid", especially if "valid" implies "safe to process" and "known" implies "already processed". diff --git a/main.py b/main.py index fcbea45..4f91e91 100644 --- a/main.py +++ b/main.py @@ -1121,6 +1121,10 @@ def push_rules( skipped_unsafe = 0 for h in unique_hostnames: + # Optimization: Check existence first to skip regex validation for known rules + if h in existing_rules: + continue + if not is_valid_rule(h): log.warning( f"Skipping unsafe rule in {sanitize_for_log(folder_name)}: {sanitize_for_log(h)}" @@ -1128,8 +1132,7 @@ def push_rules( skipped_unsafe += 1 continue - if h not in existing_rules: - filtered_hostnames.append(h) + filtered_hostnames.append(h) if skipped_unsafe > 0: log.warning( diff --git a/tests/test_push_rules_perf.py b/tests/test_push_rules_perf.py index 421368a..63a89c8 100644 --- a/tests/test_push_rules_perf.py +++ b/tests/test_push_rules_perf.py @@ -98,5 +98,31 @@ def test_push_rules_multi_batch(self, mock_executor, mock_as_completed): # This should ALWAYS be True self.assertTrue(mock_executor.called, "ThreadPoolExecutor should be called for multi-batch") + @patch("main.is_valid_rule") + def test_push_rules_skips_validation_for_existing(self, mock_is_valid): + """ + Test that is_valid_rule is NOT called for rules that are already in existing_rules. + """ + mock_is_valid.return_value = True + hostnames = ["h1", "h2"] + # h1 is already known, h2 is new + existing_rules = {"h1"} + + main.push_rules( + self.profile_id, + self.folder_name, + self.folder_id, + self.do, + self.status, + hostnames, + existing_rules, + self.client + ) + + # h1 is in existing_rules, so we should skip validation for it. + # h2 is NOT in existing_rules, so we should validate it. + # So is_valid_rule should be called EXACTLY once, with "h2". + mock_is_valid.assert_called_once_with("h2") + if __name__ == '__main__': unittest.main()