From 47c3d6060221b355f2200ba0cd8db1a6b7b80bc6 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 15 Feb 2026 14:43:34 +0000 Subject: [PATCH 1/2] perf: optimize file download with larger chunk size - Use `chunk_size=16384` (16KB) in `httpx` stream to reduce loop overhead and list appends for large blocklists. - Add journal entry in `.jules/bolt.md`. Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- .jules/bolt.md | 4 ++++ main.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index da05510..d648ac6 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -51,3 +51,7 @@ ## 2024-05-24 - [Skip Validation for Known Data] **Learning:** Performing expensive validation (e.g. regex) on data that is already known to be valid (e.g. exists in trusted remote state) is redundant. Checking existence in a local set (O(1)) before validation avoids CPU overhead for duplicates. **Action:** In filtering loops, check "is already processed/known" before "is valid", especially if "valid" implies "safe to process" and "known" implies "already processed". + +## 2026-02-04 - [Optimize Buffer for Large Downloads] +**Learning:** When downloading large files (e.g., blocklists), the default chunk size of HTTP libraries might be small, leading to excessive loop iterations and list operations. Increasing the buffer size (e.g., to 16KB) reduces CPU overhead during I/O-bound operations. +**Action:** When using `iter_bytes()` or similar streaming methods for large resources, explicitly set a larger `chunk_size` (e.g., 16384) to improve throughput and reduce CPU usage. diff --git a/main.py b/main.py index e1e3ced..57336da 100644 --- a/main.py +++ b/main.py @@ -950,7 +950,8 @@ def _gh_get(url: str) -> Dict: # 2. Stream and check actual size chunks = [] current_size = 0 - for chunk in r.iter_bytes(): + # Optimization: Use 16KB chunks to reduce loop overhead/appends for large files + for chunk in r.iter_bytes(chunk_size=16384): current_size += len(chunk) if current_size > MAX_RESPONSE_SIZE: raise ValueError( From 0819e8247ca94f8cb277ef51262dca9e2435a198 Mon Sep 17 00:00:00 2001 From: Abhi Mehrotra Date: Sun, 15 Feb 2026 20:12:46 -0600 Subject: [PATCH 2/2] Update main.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 57336da..bfe4a87 100644 --- a/main.py +++ b/main.py @@ -951,7 +951,7 @@ def _gh_get(url: str) -> Dict: chunks = [] current_size = 0 # Optimization: Use 16KB chunks to reduce loop overhead/appends for large files - for chunk in r.iter_bytes(chunk_size=16384): + for chunk in r.iter_bytes(chunk_size=16 * 1024): current_size += len(chunk) if current_size > MAX_RESPONSE_SIZE: raise ValueError(