From 3483d4f936938a00772db1d390a5d4c1d2574c5c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 12 Feb 2026 15:16:28 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Avoid=20thread=20pool?= =?UTF-8?q?=20overhead=20for=20small=20rule=20updates?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 💡 What: Modified `push_rules` to bypass `ThreadPoolExecutor` creation when `len(batches) == 1`. Instead of spinning up a thread pool for a single task, the batch is processed synchronously in the main thread. 🎯 Why: Creating a thread pool, submitting a future, and waiting for completion incurs overhead (context switching, thread management). For small updates (which result in a single batch of < 500 rules), this overhead is unnecessary and dominates the execution time compared to the actual API call. Running synchronously avoids this overhead entirely. 📊 Impact: - Eliminates thread creation latency for small folders. - Reduces CPU usage slightly by avoiding thread management. - Makes small updates faster and more efficient. 🔬 Measurement: Run `uv run python -m unittest tests/test_push_rules_perf.py`. This test verifies that `ThreadPoolExecutor` is NOT initialized for single-batch payloads, but IS initialized for multi-batch payloads, ensuring the optimization works as intended without breaking larger updates. Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- .jules/bolt.md | 4 ++ main.py | 45 +++++++++------ tests/test_push_rules_perf.py | 102 ++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 16 deletions(-) create mode 100644 tests/test_push_rules_perf.py diff --git a/.jules/bolt.md b/.jules/bolt.md index 61a542c..5006a2b 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -43,3 +43,7 @@ ## 2024-05-22 - Ordered Deduplication Optimization **Learning:** `dict.fromkeys(list)` is significantly faster (~2x) than a Python loop with `seen = set()` for deduplicating large lists while preserving order. It also naturally deduplicates invalid items if validation happens after, which prevents log spam. **Action:** Use `dict.fromkeys()` for ordered deduplication of large inputs instead of manual loop with `seen` set. + +## 2026-01-28 - [Avoid ThreadPoolExecutor Overhead] +**Learning:** `ThreadPoolExecutor` context management and thread creation overhead is non-negligible for single-item or very small workloads. If a parallelizable task only has 1 unit of work (e.g., 1 batch), running it synchronously in the main thread is faster and uses less memory than spinning up a pool. +**Action:** Check the size of the workload before creating a `ThreadPoolExecutor`. If `len(tasks) == 1`, bypass the executor and run directly. diff --git a/main.py b/main.py index bcf4a7f..8a66e0e 100644 --- a/main.py +++ b/main.py @@ -1172,23 +1172,36 @@ def process_batch(batch_idx: int, batch_data: List[str]) -> Optional[List[str]]: # Optimization 3: Parallelize batch processing # Using 3 workers to speed up writes without hitting aggressive rate limits. - with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: - futures = { - executor.submit(process_batch, i, batch): i - for i, batch in enumerate(batches, 1) - } + # If only 1 batch, run it synchronously to avoid ThreadPoolExecutor overhead. + if total_batches == 1: + result = process_batch(1, batches[0]) + if result: + successful_batches += 1 + existing_rules.update(result) + + render_progress_bar( + successful_batches, + total_batches, + f"Folder {sanitize_for_log(folder_name)}", + ) + else: + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + futures = { + executor.submit(process_batch, i, batch): i + for i, batch in enumerate(batches, 1) + } - for future in concurrent.futures.as_completed(futures): - result = future.result() - if result: - successful_batches += 1 - existing_rules.update(result) - - render_progress_bar( - successful_batches, - total_batches, - f"Folder {sanitize_for_log(folder_name)}", - ) + for future in concurrent.futures.as_completed(futures): + result = future.result() + if result: + successful_batches += 1 + existing_rules.update(result) + + render_progress_bar( + successful_batches, + total_batches, + f"Folder {sanitize_for_log(folder_name)}", + ) if successful_batches == total_batches: if USE_COLORS: diff --git a/tests/test_push_rules_perf.py b/tests/test_push_rules_perf.py new file mode 100644 index 0000000..421368a --- /dev/null +++ b/tests/test_push_rules_perf.py @@ -0,0 +1,102 @@ + +import unittest +from unittest.mock import MagicMock, patch, ANY +import sys +import os + +# Add root to path to import main +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import main + +class TestPushRulesPerf(unittest.TestCase): + def setUp(self): + self.client = MagicMock() + self.profile_id = "test_profile" + self.folder_name = "test_folder" + self.folder_id = "test_folder_id" + self.do = 1 + self.status = 1 + self.existing_rules = set() + + @patch("main.concurrent.futures.as_completed") + @patch("main.concurrent.futures.ThreadPoolExecutor") + def test_push_rules_single_batch_optimization(self, mock_executor, mock_as_completed): + """ + Test that push_rules avoids ThreadPoolExecutor for single batch (< 500 rules). + """ + # Create < 500 rules (1 batch) + hostnames = [f"example{i}.com" for i in range(100)] + + # Mock executor context manager + mock_executor_instance = mock_executor.return_value + mock_executor_instance.__enter__.return_value = mock_executor_instance + mock_executor_instance.__exit__.return_value = None + + # Mock future + mock_future = MagicMock() + mock_future.result.return_value = hostnames # Success + mock_executor_instance.submit.return_value = mock_future + + # Mock as_completed to yield the future immediately + mock_as_completed.return_value = [mock_future] + + # Since we are bypassing TPE, we might need to mock API call? + # The code will call process_batch(1, batch). + # process_batch calls _api_post_form. + # client is mocked, so _api_post_form works (retries mocked). + # But we need to ensure process_batch works correctly in isolation. + + # For this test, we mock _api_post_form? + # No, _api_post_form calls client.post. + + main.push_rules( + self.profile_id, + self.folder_name, + self.folder_id, + self.do, + self.status, + hostnames, + self.existing_rules, + self.client + ) + + # Verify if Executor was called. + # AFTER OPTIMIZATION: This should be False. + self.assertFalse(mock_executor.called, "ThreadPoolExecutor should NOT be called for single batch") + + @patch("main.concurrent.futures.as_completed") + @patch("main.concurrent.futures.ThreadPoolExecutor") + def test_push_rules_multi_batch(self, mock_executor, mock_as_completed): + """ + Test that push_rules uses ThreadPoolExecutor for multiple batches (> 500 rules). + """ + # Create > 500 rules (2 batches) + hostnames = [f"example{i}.com" for i in range(600)] + + mock_executor_instance = mock_executor.return_value + mock_executor_instance.__enter__.return_value = mock_executor_instance + + # Mock submit to return a Future + mock_future = MagicMock() + mock_future.result.return_value = ["some_rule"] + mock_executor_instance.submit.return_value = mock_future + + mock_as_completed.return_value = [mock_future, mock_future] # 2 batches + + main.push_rules( + self.profile_id, + self.folder_name, + self.folder_id, + self.do, + self.status, + hostnames, + self.existing_rules, + self.client + ) + + # This should ALWAYS be True + self.assertTrue(mock_executor.called, "ThreadPoolExecutor should be called for multi-batch") + +if __name__ == '__main__': + unittest.main() From 23d6d9403c66be5c3c9d18834399c59a157efdd4 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Thu, 12 Feb 2026 20:52:33 -0600 Subject: [PATCH 2/2] test: avoid stale main module reference in plan details tests Co-Authored-By: Warp --- tests/test_plan_details.py | 45 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/tests/test_plan_details.py b/tests/test_plan_details.py index e7fff27..a7fe865 100644 --- a/tests/test_plan_details.py +++ b/tests/test_plan_details.py @@ -5,12 +5,16 @@ import sys from unittest.mock import patch -import main +# NOTE: Avoid importing `main` at module import time. +# Some tests delete `sys.modules["main"]` to force a clean import under different env/TTY +# settings; holding a stale module reference can cause patches to target the wrong module. def test_print_plan_details_no_colors(capsys): """Test print_plan_details output when colors are disabled.""" - with patch("main.USE_COLORS", False): + import main as m + + with patch.object(m, "USE_COLORS", False): plan_entry = { "profile": "test_profile", "folders": [ @@ -18,7 +22,7 @@ def test_print_plan_details_no_colors(capsys): {"name": "Folder A", "rules": 10}, ], } - main.print_plan_details(plan_entry) + m.print_plan_details(plan_entry) captured = capsys.readouterr() output = captured.out @@ -32,9 +36,11 @@ def test_print_plan_details_no_colors(capsys): def test_print_plan_details_empty_folders(capsys): """Test print_plan_details with no folders.""" - with patch("main.USE_COLORS", False): + import main as m + + with patch.object(m, "USE_COLORS", False): plan_entry = {"profile": "test_profile", "folders": []} - main.print_plan_details(plan_entry) + m.print_plan_details(plan_entry) captured = capsys.readouterr() output = captured.out @@ -45,27 +51,22 @@ def test_print_plan_details_empty_folders(capsys): def test_print_plan_details_with_colors(capsys): """Test print_plan_details output when colors are enabled.""" - # Force USE_COLORS=True for this test, but also ensure Colors class is populated - # The Colors class is defined at import time based on USE_COLORS. - # If main was imported previously with USE_COLORS=False, Colors attributes are empty strings. - # We must reload main with an environment that forces USE_COLORS=True, or mock Colors. - - with patch.dict(os.environ, {"NO_COLOR": ""}): - with patch("sys.stderr.isatty", return_value=True), patch("sys.stdout.isatty", return_value=True): - # Robust reload: handle case where main module reference is stale - if "main" in sys.modules: - importlib.reload(sys.modules["main"]) - else: - import main - importlib.reload(main) - - # Now verify output with colors + # Force USE_COLORS=True for this test, and reload `main` so the `Colors` class is + # created with non-empty ANSI codes. + + with patch.dict(os.environ, {"NO_COLOR": ""}, clear=False): + with patch("sys.stderr.isatty", return_value=True), patch( + "sys.stdout.isatty", return_value=True + ): + import main as m + + m = importlib.reload(m) + plan_entry = { "profile": "test_profile", "folders": [{"name": "Folder A", "rules": 10}], } - # Use the module from sys.modules to ensure we use the reloaded one - sys.modules["main"].print_plan_details(plan_entry) + m.print_plan_details(plan_entry) captured = capsys.readouterr() output = captured.out