Skip to content

Commit 933dcba

Browse files
committed
Merge remote-tracking branch 'origin/bolt-parallel-dns-validation-9702303872108023141'
# Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit.
2 parents 892db0f + 7ff113f commit 933dcba

2 files changed

Lines changed: 26 additions & 7 deletions

File tree

.jules/bolt.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,7 @@
3131
## 2024-05-24 - Pass Local State to Avoid Redundant Reads
3232
**Learning:** When a process involves modifying remote state (e.g. deleting folders) and then querying it (e.g. getting rules from remaining folders), maintaining a local replica of the state avoids redundant API calls. If you know what you deleted, you don't need to ask the server "what's left?".
3333
**Action:** Identify sequences of "Read -> Modify -> Read" and optimize to "Read -> Modify (update local) -> Use local".
34+
35+
## 2024-05-24 - Parallelize Validation with Fetching
36+
**Learning:** Sequential validation (especially if it involves network IO like DNS lookups) before parallel fetching creates a bottleneck. Combining validation and fetching into a single task within a `ThreadPoolExecutor` allows validation latency to be absorbed by parallelism.
37+
**Action:** Look for patterns like `[url for url in urls if validate(url)]` followed by `ThreadPoolExecutor`. Move the `validate(url)` check inside the executor task.

main.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -543,17 +543,24 @@ def fetch_folder_data(url: str) -> Dict[str, Any]:
543543

544544
def warm_up_cache(urls: Sequence[str]) -> None:
545545
urls = list(set(urls))
546-
urls_to_fetch = [u for u in urls if u not in _cache and validate_folder_url(u)]
547-
if not urls_to_fetch:
546+
urls_to_process = [u for u in urls if u not in _cache]
547+
if not urls_to_process:
548548
return
549549

550-
total = len(urls_to_fetch)
550+
total = len(urls_to_process)
551551
if not USE_COLORS:
552552
log.info(f"Warming up cache for {total} URLs...")
553553

554+
# OPTIMIZATION: Combine validation (DNS) and fetching (HTTP) in one task
555+
# to allow validation latency to be parallelized.
556+
def _validate_and_fetch(url: str):
557+
if validate_folder_url(url):
558+
return _gh_get(url)
559+
return None
560+
554561
completed = 0
555562
with concurrent.futures.ThreadPoolExecutor() as executor:
556-
futures = {executor.submit(_gh_get, url): url for url in urls_to_fetch}
563+
futures = {executor.submit(_validate_and_fetch, url): url for url in urls_to_process}
557564

558565
if USE_COLORS:
559566
sys.stderr.write(f"\r{Colors.CYAN}⏳ Warming up cache: 0/{total}...{Colors.ENDC}")
@@ -809,15 +816,23 @@ def sync_profile(
809816
try:
810817
# Fetch all folder data first
811818
folder_data_list = []
812-
valid_urls = [url for url in folder_urls if validate_folder_url(url)]
819+
820+
# OPTIMIZATION: Move validation inside the thread pool to parallelize DNS lookups.
821+
# Previously, sequential validation blocked the main thread.
822+
def _fetch_if_valid(url: str):
823+
if validate_folder_url(url):
824+
return fetch_folder_data(url)
825+
return None
813826

814827
with concurrent.futures.ThreadPoolExecutor() as executor:
815-
future_to_url = {executor.submit(fetch_folder_data, url): url for url in valid_urls}
828+
future_to_url = {executor.submit(_fetch_if_valid, url): url for url in folder_urls}
816829

817830
for future in concurrent.futures.as_completed(future_to_url):
818831
url = future_to_url[future]
819832
try:
820-
folder_data_list.append(future.result())
833+
result = future.result()
834+
if result:
835+
folder_data_list.append(result)
821836
except (httpx.HTTPError, KeyError, ValueError) as e:
822837
log.error(f"Failed to fetch folder data from {sanitize_for_log(url)}: {sanitize_for_log(e)}")
823838
continue

0 commit comments

Comments
 (0)