From 9703e3265307b72cc37eb761551e48fcd77b2f69 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:12:35 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20replace=20O(4N)=20loop=20wi?= =?UTF-8?q?th=20O(N)=20hash=20map=20grouping?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced four list comprehensions in `generate_processing_report` with a single dictionary grouping pass to improve time complexity from O(4N) to O(N). Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com> --- .../pipelines/integrated/process_all_datasets.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/training/ready_packages/pipelines/integrated/process_all_datasets.py b/training/ready_packages/pipelines/integrated/process_all_datasets.py index b39df81b..ba4b4117 100644 --- a/training/ready_packages/pipelines/integrated/process_all_datasets.py +++ b/training/ready_packages/pipelines/integrated/process_all_datasets.py @@ -140,15 +140,17 @@ def process_all_datasets(self) -> Dict[str, Any]: def generate_processing_report(self) -> Dict[str, Any]: """Generate processing report""" + # ⚡ Bolt: Replaced O(N*4) multi-pass loops with a single O(N) hash map grouping + by_stage = {STAGE1_ID: 0, STAGE2_ID: 0, STAGE3_ID: 0, STAGE4_ID: 0} + for d in self.processed_datasets: + stage = d.get("stage") + if stage in by_stage: + by_stage[stage] += 1 + return { "timestamp": datetime.now().isoformat(), "processed_datasets": len(self.processed_datasets), - "by_stage": { - STAGE1_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE1_ID), - STAGE2_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE2_ID), - STAGE3_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE3_ID), - STAGE4_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE4_ID), - }, + "by_stage": by_stage, "datasets": self.processed_datasets, }