RohanExploit · RohanExploit · May 13, 2026 · coderabbitai · May 13, 2026 · coderabbitai
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -89,3 +89,7 @@
 ## 2026-05-18 - Jaccard Similarity Optimization via Set Arithmetic
 **Learning:** In retrieval loops calculating Jaccard similarity (e.g. RAG), explicitly building a union set `A.union(B)` is expensive due to memory allocation and population.
 **Action:** Use the inclusion-exclusion principle $|A \cup B| = |A| + |B| - |A \cap B|$ to calculate union size in O(1) arithmetic time after calculating the intersection. Pre-calculate $|B|$ (token count) to further reduce overhead. Use `isdisjoint()` for fast early-exit.
+
+## 2025-02-18 - SQLAlchemy Multiple Aggregations vs Standard GROUP BY
+**Learning:** When calculating aggregates over categorical columns (e.g., status counts, confirmation types) in SQLAlchemy, using standard `GROUP BY` queries (e.g., `db.query(Model.type, func.count(Model.id)).group_by(Model.type)`) is measurably faster and scales better than stringing together multiple `func.sum(case(...))` statements, reducing database parse time and scan overhead.
+**Action:** When auditing ORM queries for performance, replace multiple conditional aggregations (`func.sum(case)`) with standard `GROUP BY` patterns combined with Python-side extraction where categories are cleanly divisible.
diff --git a/backend/closure_service.py b/backend/closure_service.py
@@ -139,14 +139,20 @@ def check_and_finalize_closure(grievance_id: int, db: Session) -> dict:
         ).scalar()
 
         # Get all confirmation counts in a single query instead of multiple round-trips
-        from sqlalchemy import case
+        # Optimized: Standard GROUP BY is measurably faster than multiple func.sum(case(...)) aggregations
         stats = db.query(
-            func.sum(case((ClosureConfirmation.confirmation_type == 'confirmed', 1), else_=0)).label('confirmed'),
-            func.sum(case((ClosureConfirmation.confirmation_type == 'disputed', 1), else_=0)).label('disputed')
-        ).filter(ClosureConfirmation.grievance_id == grievance_id).first()
+            ClosureConfirmation.confirmation_type,
+            func.count(ClosureConfirmation.id)
+        ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all()
 
-        confirmations_count = stats.confirmed or 0
-        disputes_count = stats.disputed or 0
+        confirmations_count = 0
+        disputes_count = 0
+
+        for ctype, count in stats:
+            if ctype == 'confirmed':
+                confirmations_count = count
+            elif ctype == 'disputed':
+                disputes_count = count
-        for ctype, count in stats:
-            if ctype == 'confirmed':
-                confirmations_count = count
-            elif ctype == 'disputed':
-                disputes_count = count
+        for ctype, count in stats:
+            ctype_val = ctype.value if hasattr(ctype, 'value') else ctype
+            if ctype_val == 'confirmed':
+                confirmations_count = count
+            elif ctype_val == 'disputed':
+                disputes_count = count
-        for ctype, count in stats:
-            if ctype == 'confirmed':
-                confirmations_count = count
-            elif ctype == 'disputed':
-                disputes_count = count
+        for ctype, count in stats:
+            ctype_val = ctype.value if hasattr(ctype, 'value') else ctype
+            if ctype_val == 'confirmed':
+                confirmations_count = count
+            elif ctype_val == 'disputed':
+                disputes_count = count
 
         required_confirmations = max(1, int(total_followers * ClosureService.CONFIRMATION_THRESHOLD))
 

diff --git a/backend/routers/admin.py b/backend/routers/admin.py
@@ -42,17 +42,30 @@ def get_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
 def get_system_stats(db: Session = Depends(get_db)):
     """
     Get system-wide user statistics.
-    Optimized: Uses a single aggregate query to calculate multiple metrics simultaneously,
-    reducing database round-trips and scan overhead.
+    Optimized: Standard GROUP BY is measurably faster than multiple func.sum(case(...)) aggregations.
     """
     stats = db.query(
-        func.count(User.id).label("total"),
-        func.sum(case((User.role == UserRole.ADMIN, 1), else_=0)).label("admins"),
-        func.sum(case((User.is_active.is_(True), 1), else_=0)).label("active")
-    ).first()
+        User.role,
+        User.is_active,
+        func.count(User.id)
+    ).group_by(User.role, User.is_active).all()
+
+    total_users = 0
+    admin_count = 0
+    active_users = 0
+
+    for role, is_active, count in stats:
+        total_users += count
+
+        role_val = role.value if hasattr(role, 'value') else role
+        if role_val == 'admin':
+            admin_count += count
+
+        if is_active:
+            active_users += count
 
     return {
-        "total_users": stats.total or 0,
-        "admin_count": int(stats.admins or 0),
-        "active_users": int(stats.active or 0),
+        "total_users": total_users,
+        "admin_count": admin_count,
+        "active_users": active_users,
     }
diff --git a/backend/routers/field_officer.py b/backend/routers/field_officer.py
@@ -437,22 +437,38 @@ def get_visit_statistics(db: Session = Depends(get_db)):
         if cached_json:
             return Response(content=cached_json, media_type="application/json")
 
-        # Optimized: Use a single aggregate query to fetch multiple statistics in one database roundtrip
-        stats = db.query(
-            func.count(FieldOfficerVisit.id).label('total'),
-            func.sum(case((FieldOfficerVisit.verified_at.isnot(None), 1), else_=0)).label('verified'),
-            func.sum(case((FieldOfficerVisit.within_geofence == True, 1), else_=0)).label('within_geofence'),
-            func.sum(case((FieldOfficerVisit.within_geofence == False, 1), else_=0)).label('outside_geofence'),
+        # Optimized: Standard GROUP BY is measurably faster than multiple func.sum(case(...)) aggregations.
+        groups = db.query(
+            FieldOfficerVisit.verified_at.isnot(None).label('is_verified'),
+            FieldOfficerVisit.within_geofence,
+            func.count(FieldOfficerVisit.id)
+        ).group_by(
+            FieldOfficerVisit.verified_at.isnot(None),
+            FieldOfficerVisit.within_geofence
+        ).all()
+
+        # Second query for global aggregates
+        global_stats = db.query(
             func.count(func.distinct(FieldOfficerVisit.officer_email)).label('unique_officers'),
             func.avg(FieldOfficerVisit.distance_from_site).label('avg_distance')
         ).first()
 
-        total_visits = stats.total or 0
-        verified_visits = int(stats.verified or 0)
-        within_geofence_count = int(stats.within_geofence or 0)
-        outside_geofence_count = int(stats.outside_geofence or 0)
-        unique_officers = stats.unique_officers or 0
-        average_distance = stats.avg_distance
+        total_visits = 0
+        verified_visits = 0
+        within_geofence_count = 0
+        outside_geofence_count = 0
+
+        for is_ver, in_geo, count in groups:
+            total_visits += count
+            if is_ver:
+                verified_visits += count
+            if in_geo is True:
+                within_geofence_count += count
+            elif in_geo is False:
+                outside_geofence_count += count
+
+        unique_officers = global_stats.unique_officers or 0
+        average_distance = global_stats.avg_distance
 
         # Round to 2 decimals if not None
         if average_distance is not None:

diff --git a/backend/routers/grievances.py b/backend/routers/grievances.py
@@ -435,14 +435,20 @@ def get_closure_status(
         ).scalar()
 
         # Get all confirmation counts in a single query instead of multiple round-trips
-        from sqlalchemy import case
+        # Optimized: Standard GROUP BY is measurably faster than multiple func.sum(case(...)) aggregations
         stats = db.query(
-            func.sum(case((ClosureConfirmation.confirmation_type == 'confirmed', 1), else_=0)).label('confirmed'),
-            func.sum(case((ClosureConfirmation.confirmation_type == 'disputed', 1), else_=0)).label('disputed')
-        ).filter(ClosureConfirmation.grievance_id == grievance_id).first()
+            ClosureConfirmation.confirmation_type,
+            func.count(ClosureConfirmation.id)
+        ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all()
+
+        confirmations_count = 0
+        disputes_count = 0
 
-        confirmations_count = stats.confirmed or 0
-        disputes_count = stats.disputed or 0
+        for ctype, count in stats:
+            if ctype == 'confirmed':
+                confirmations_count = count
+            elif ctype == 'disputed':
+                disputes_count = count
-        for ctype, count in stats:
-            if ctype == 'confirmed':
-                confirmations_count = count
-            elif ctype == 'disputed':
-                disputes_count = count
+        for ctype, count in stats:
+            ctype_val = ctype.value if hasattr(ctype, 'value') else ctype
+            if ctype_val == 'confirmed':
+                confirmations_count = count
+            elif ctype_val == 'disputed':
+                disputes_count = count
-        for ctype, count in stats:
-            if ctype == 'confirmed':
-                confirmations_count = count
-            elif ctype == 'disputed':
-                disputes_count = count
+        for ctype, count in stats:
+            ctype_val = ctype.value if hasattr(ctype, 'value') else ctype
+            if ctype_val == 'confirmed':
+                confirmations_count = count
+            elif ctype_val == 'disputed':
+                disputes_count = count
 
         required_confirmations = max(1, int(total_followers * ClosureService.CONFIRMATION_THRESHOLD))
 

diff --git a/backend/routers/utility.py b/backend/routers/utility.py
@@ -53,25 +53,29 @@ def get_stats(db: Session = Depends(get_db)):
     if cached_stats:
         return Response(content=cached_stats, media_type="application/json")
 
-    # Optimized: Single aggregate query for both category breakdowns and system-wide totals
+    # Optimized: Standard GROUP BY is measurably faster than multiple func.sum(case(...)) aggregations
     # This eliminates a redundant database roundtrip
-    cat_counts = db.query(
+    cat_status_counts = db.query(
         Issue.category,
-        func.count(Issue.id).label("total"),
-        func.sum(case((Issue.status.in_(['resolved', 'verified']), 1), else_=0)).label("resolved")
-    ).group_by(Issue.category).all()
+        Issue.status,
+        func.count(Issue.id)
+    ).group_by(Issue.category, Issue.status).all()
 
     total = 0
     resolved = 0
     issues_by_category = {}
 
-    for cat, cat_total, cat_resolved in cat_counts:
+    for cat, status, count in cat_status_counts:
         # Sum up system-wide totals
-        total += cat_total or 0
-        resolved += int(cat_resolved or 0)
+        total += count
+
+        # Handle resolving logic based on string or enum value
+        status_val = status.value if hasattr(status, 'value') else status
+        if status_val in ['resolved', 'verified']:
+            resolved += count
 
         # Build category breakdown
-        issues_by_category[cat] = cat_total or 0
+        issues_by_category[cat] = issues_by_category.get(cat, 0) + count
 
     # Pending is everything else
     pending = total - resolved

diff --git a/backend/tests/benchmark_closure_status.py b/backend/tests/benchmark_closure_status.py