From 7cd0e2a11d6c448b662528e35576d393c381caf2 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 30 Mar 2026 22:38:52 +0000
Subject: [PATCH 1/2] Cache numpy arrays in embedding search

Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com>
---
 .Jules/bolt.md                 |  1 +
 api/embedding_agent/service.py | 27 +++++++++++++++++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/.Jules/bolt.md b/.Jules/bolt.md
index b507f81f..15668eba 100644
--- a/.Jules/bolt.md
+++ b/.Jules/bolt.md
@@ -1,2 +1,3 @@
 ## 2024-03-28 - Initial Bolt Run | Learning: The codebase is primarily Python, not TS/JS/Astro | Action: Adjust search queries to look for Python performance hotspots, such as nested loops, N+1 queries, unnecessary list comprehensions, or missing caching/memoization.
 ## 2024-03-28 - Optimize list comprehensions | Learning: Nested or repeated list comprehensions on large arrays cause unnecessary overhead in Python | Action: Consolidate repeated iterations into a single O(N) loop.
+## 2026-03-30 - Lazy caching of normalized embeddings | Learning: Converting embeddings to numpy arrays and normalizing them per query per item inside a loop causes a massive CPU bottleneck during vector similarity searches. | Action: Cache the parsed and normalized numpy array on the knowledge item object itself upon first use, saving repetitive O(n) array allocations and math per query.
diff --git a/api/embedding_agent/service.py b/api/embedding_agent/service.py
index ea771518..3e7f5bcc 100644
--- a/api/embedding_agent/service.py
+++ b/api/embedding_agent/service.py
@@ -532,20 +532,27 @@ def _search_knowledge_items(
                 if item_type not in [kt.value for kt in knowledge_types]:
                     continue
 
-            # Get item embedding
-            item_embedding = getattr(item, "embedding", None)
-            if item_embedding is None:
-                continue
+            # ⚡ Bolt: Cache parsed normalized numpy arrays to avoid regenerating on every query
+            item_np = getattr(item, "_cached_np_embedding", None)
+
+            # Get item embedding if cache misses
+            if item_np is None:
+                item_embedding = getattr(item, "embedding", None)
+                if item_embedding is None:
+                    continue
+                if NUMPY_AVAILABLE and query_np is not None:
+                    item_np = np.array(item_embedding)
+                    if self.config.normalize_embeddings:
+                        norm = np.linalg.norm(item_np)
+                        if norm > 0:
+                            item_np = item_np / norm
+                    setattr(item, "_cached_np_embedding", item_np)
 
             # Calculate similarity
-            if NUMPY_AVAILABLE and query_np is not None:
-                item_np = np.array(item_embedding)
-                if self.config.normalize_embeddings:
-                    norm = np.linalg.norm(item_np)
-                    if norm > 0:
-                        item_np = item_np / norm
+            if NUMPY_AVAILABLE and query_np is not None and item_np is not None:
                 similarity = float(np.dot(query_np, item_np))
             else:
+                item_embedding = getattr(item, "embedding", None)
                 # Simple cosine similarity
                 dot_product = sum(
                     a * b for a, b in zip(query_embedding, item_embedding)

From a7dc1fdadc57025762d3565136f8c7e55d196cfc Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 30 Mar 2026 23:02:11 +0000
Subject: [PATCH 2/2] Fix CodeFactor complex method issue in embedding agent

Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com>
---
 .Jules/bolt.md                 |  1 -
 api/embedding_agent/service.py | 66 ++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/.Jules/bolt.md b/.Jules/bolt.md
index 15668eba..b507f81f 100644
--- a/.Jules/bolt.md
+++ b/.Jules/bolt.md
@@ -1,3 +1,2 @@
 ## 2024-03-28 - Initial Bolt Run | Learning: The codebase is primarily Python, not TS/JS/Astro | Action: Adjust search queries to look for Python performance hotspots, such as nested loops, N+1 queries, unnecessary list comprehensions, or missing caching/memoization.
 ## 2024-03-28 - Optimize list comprehensions | Learning: Nested or repeated list comprehensions on large arrays cause unnecessary overhead in Python | Action: Consolidate repeated iterations into a single O(N) loop.
-## 2026-03-30 - Lazy caching of normalized embeddings | Learning: Converting embeddings to numpy arrays and normalizing them per query per item inside a loop causes a massive CPU bottleneck during vector similarity searches. | Action: Cache the parsed and normalized numpy array on the knowledge item object itself upon first use, saving repetitive O(n) array allocations and math per query.
diff --git a/api/embedding_agent/service.py b/api/embedding_agent/service.py
index 3e7f5bcc..296bb0b7 100644
--- a/api/embedding_agent/service.py
+++ b/api/embedding_agent/service.py
@@ -502,6 +502,38 @@ def search_similar(
             model_used=self.config.model_name.value,
         )
 
+    def _compute_similarity(
+        self, item: Any, query_embedding: List[float], query_np: Any
+    ) -> Optional[float]:
+        # ⚡ Bolt: Cache normalized numpy arrays safely to avoid regenerating on query
+        item_np = getattr(item, "_cached_np_embedding", None)
+
+        if item_np is None:
+            item_embedding = getattr(item, "embedding", None)
+            if item_embedding is None:
+                return None
+            if NUMPY_AVAILABLE and query_np is not None:
+                item_np = np.array(item_embedding)
+                if self.config.normalize_embeddings:
+                    norm = np.linalg.norm(item_np)
+                    if norm > 0:
+                        item_np = item_np / norm
+                try:
+                    setattr(item, "_cached_np_embedding", item_np)
+                except AttributeError:
+                    pass  # Skip caching if item is a strict class (e.g., uses slots)
+
+        if NUMPY_AVAILABLE and query_np is not None and item_np is not None:
+            return float(np.dot(query_np, item_np))
+
+        item_embedding = getattr(item, "embedding", None)
+        if item_embedding is None:
+            return None
+        dot_product = sum(a * b for a, b in zip(query_embedding, item_embedding))
+        norm_q = sum(x**2 for x in query_embedding) ** 0.5
+        norm_i = sum(x**2 for x in item_embedding) ** 0.5
+        return dot_product / (norm_q * norm_i) if norm_q * norm_i > 0 else 0.0
+
     def _search_knowledge_items(
         self,
         query_embedding: List[float],
@@ -526,42 +558,14 @@ def _search_knowledge_items(
                     query_np = query_np / norm
 
         for item in self._knowledge_items:
-            # Filter by knowledge type if specified
             if knowledge_types:
                 item_type = getattr(item, "knowledge_type", "general")
                 if item_type not in [kt.value for kt in knowledge_types]:
                     continue
 
-            # ⚡ Bolt: Cache parsed normalized numpy arrays to avoid regenerating on every query
-            item_np = getattr(item, "_cached_np_embedding", None)
-
-            # Get item embedding if cache misses
-            if item_np is None:
-                item_embedding = getattr(item, "embedding", None)
-                if item_embedding is None:
-                    continue
-                if NUMPY_AVAILABLE and query_np is not None:
-                    item_np = np.array(item_embedding)
-                    if self.config.normalize_embeddings:
-                        norm = np.linalg.norm(item_np)
-                        if norm > 0:
-                            item_np = item_np / norm
-                    setattr(item, "_cached_np_embedding", item_np)
-
-            # Calculate similarity
-            if NUMPY_AVAILABLE and query_np is not None and item_np is not None:
-                similarity = float(np.dot(query_np, item_np))
-            else:
-                item_embedding = getattr(item, "embedding", None)
-                # Simple cosine similarity
-                dot_product = sum(
-                    a * b for a, b in zip(query_embedding, item_embedding)
-                )
-                norm_q = sum(x**2 for x in query_embedding) ** 0.5
-                norm_i = sum(x**2 for x in item_embedding) ** 0.5
-                similarity = (
-                    dot_product / (norm_q * norm_i) if norm_q * norm_i > 0 else 0.0
-                )
+            similarity = self._compute_similarity(item, query_embedding, query_np)
+            if similarity is None:
+                continue
 
             # Apply threshold
             if similarity >= min_similarity: