From 9703e3265307b72cc37eb761551e48fcd77b2f69 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 31 Mar 2026 18:12:35 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20replace=20O(4N)=20loop=20wi?=
 =?UTF-8?q?th=20O(N)=20hash=20map=20grouping?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaced four list comprehensions in `generate_processing_report`
with a single dictionary grouping pass to improve time complexity
from O(4N) to O(N).

Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com>
---
 .../pipelines/integrated/process_all_datasets.py   | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/training/ready_packages/pipelines/integrated/process_all_datasets.py b/training/ready_packages/pipelines/integrated/process_all_datasets.py
index b39df81b..ba4b4117 100644
--- a/training/ready_packages/pipelines/integrated/process_all_datasets.py
+++ b/training/ready_packages/pipelines/integrated/process_all_datasets.py
@@ -140,15 +140,17 @@ def process_all_datasets(self) -> Dict[str, Any]:
 
     def generate_processing_report(self) -> Dict[str, Any]:
         """Generate processing report"""
+        # ⚡ Bolt: Replaced O(N*4) multi-pass loops with a single O(N) hash map grouping
+        by_stage = {STAGE1_ID: 0, STAGE2_ID: 0, STAGE3_ID: 0, STAGE4_ID: 0}
+        for d in self.processed_datasets:
+            stage = d.get("stage")
+            if stage in by_stage:
+                by_stage[stage] += 1
+
         return {
             "timestamp": datetime.now().isoformat(),
             "processed_datasets": len(self.processed_datasets),
-            "by_stage": {
-                STAGE1_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE1_ID),
-                STAGE2_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE2_ID),
-                STAGE3_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE3_ID),
-                STAGE4_ID: sum(1 for d in self.processed_datasets if d.get("stage") == STAGE4_ID),
-            },
+            "by_stage": by_stage,
             "datasets": self.processed_datasets,
         }