-
Notifications
You must be signed in to change notification settings - Fork 1
β‘ Bolt: Parallelize DNS validation in sync_profile and warm_up_cache #132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -469,17 +469,24 @@ def fetch_folder_data(url: str) -> Dict[str, Any]: | |
|
|
||
| def warm_up_cache(urls: Sequence[str]) -> None: | ||
| urls = list(set(urls)) | ||
| urls_to_fetch = [u for u in urls if u not in _cache and validate_folder_url(u)] | ||
| if not urls_to_fetch: | ||
| urls_to_process = [u for u in urls if u not in _cache] | ||
| if not urls_to_process: | ||
| return | ||
|
|
||
| total = len(urls_to_fetch) | ||
| total = len(urls_to_process) | ||
| if not USE_COLORS: | ||
| log.info(f"Warming up cache for {total} URLs...") | ||
|
|
||
| # OPTIMIZATION: Combine validation (DNS) and fetching (HTTP) in one task | ||
| # to allow validation latency to be parallelized. | ||
| def _validate_and_fetch(url: str): | ||
| if validate_folder_url(url): | ||
| return _gh_get(url) | ||
| return None | ||
|
Comment on lines
+482
to
+485
|
||
|
|
||
| completed = 0 | ||
| with concurrent.futures.ThreadPoolExecutor() as executor: | ||
| futures = {executor.submit(_gh_get, url): url for url in urls_to_fetch} | ||
| futures = {executor.submit(_validate_and_fetch, url): url for url in urls_to_process} | ||
|
|
||
| if USE_COLORS: | ||
| sys.stderr.write(f"\r{Colors.CYAN}β³ Warming up cache: 0/{total}...{Colors.ENDC}") | ||
|
|
@@ -735,15 +742,23 @@ def sync_profile( | |
| try: | ||
| # Fetch all folder data first | ||
| folder_data_list = [] | ||
| valid_urls = [url for url in folder_urls if validate_folder_url(url)] | ||
|
|
||
| # OPTIMIZATION: Move validation inside the thread pool to parallelize DNS lookups. | ||
| # Previously, sequential validation blocked the main thread. | ||
| def _fetch_if_valid(url: str): | ||
| if validate_folder_url(url): | ||
| return fetch_folder_data(url) | ||
| return None | ||
|
Comment on lines
+748
to
+751
|
||
|
|
||
| with concurrent.futures.ThreadPoolExecutor() as executor: | ||
| future_to_url = {executor.submit(fetch_folder_data, url): url for url in valid_urls} | ||
| future_to_url = {executor.submit(_fetch_if_valid, url): url for url in folder_urls} | ||
|
|
||
| for future in concurrent.futures.as_completed(future_to_url): | ||
| url = future_to_url[future] | ||
| try: | ||
| folder_data_list.append(future.result()) | ||
| result = future.result() | ||
| if result: | ||
| folder_data_list.append(result) | ||
| except (httpx.HTTPError, KeyError, ValueError) as e: | ||
| log.error(f"Failed to fetch folder data from {sanitize_for_log(url)}: {sanitize_for_log(e)}") | ||
| continue | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This optimization contradicts the learning documented in
.jules/bolt.mdlines 15-17 about thread safety. The journal explicitly states "When parallelizing IO operations that update a shared collection (like a set of existing rules), always use athreading.Lockfor the write operations." The_cachedictionary is a shared collection being updated from multiple threads without synchronization, which violates this established principle.