From 7cd0e2a11d6c448b662528e35576d393c381caf2 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 30 Mar 2026 22:38:52 +0000 Subject: [PATCH 1/2] Cache numpy arrays in embedding search Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com> --- .Jules/bolt.md | 1 + api/embedding_agent/service.py | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/.Jules/bolt.md b/.Jules/bolt.md index b507f81f..15668eba 100644 --- a/.Jules/bolt.md +++ b/.Jules/bolt.md @@ -1,2 +1,3 @@ ## 2024-03-28 - Initial Bolt Run | Learning: The codebase is primarily Python, not TS/JS/Astro | Action: Adjust search queries to look for Python performance hotspots, such as nested loops, N+1 queries, unnecessary list comprehensions, or missing caching/memoization. ## 2024-03-28 - Optimize list comprehensions | Learning: Nested or repeated list comprehensions on large arrays cause unnecessary overhead in Python | Action: Consolidate repeated iterations into a single O(N) loop. +## 2026-03-30 - Lazy caching of normalized embeddings | Learning: Converting embeddings to numpy arrays and normalizing them per query per item inside a loop causes a massive CPU bottleneck during vector similarity searches. | Action: Cache the parsed and normalized numpy array on the knowledge item object itself upon first use, saving repetitive O(n) array allocations and math per query. diff --git a/api/embedding_agent/service.py b/api/embedding_agent/service.py index ea771518..3e7f5bcc 100644 --- a/api/embedding_agent/service.py +++ b/api/embedding_agent/service.py @@ -532,20 +532,27 @@ def _search_knowledge_items( if item_type not in [kt.value for kt in knowledge_types]: continue - # Get item embedding - item_embedding = getattr(item, "embedding", None) - if item_embedding is None: - continue + # ⚡ Bolt: Cache parsed normalized numpy arrays to avoid regenerating on every query + item_np = getattr(item, "_cached_np_embedding", None) + + # Get item embedding if cache misses + if item_np is None: + item_embedding = getattr(item, "embedding", None) + if item_embedding is None: + continue + if NUMPY_AVAILABLE and query_np is not None: + item_np = np.array(item_embedding) + if self.config.normalize_embeddings: + norm = np.linalg.norm(item_np) + if norm > 0: + item_np = item_np / norm + setattr(item, "_cached_np_embedding", item_np) # Calculate similarity - if NUMPY_AVAILABLE and query_np is not None: - item_np = np.array(item_embedding) - if self.config.normalize_embeddings: - norm = np.linalg.norm(item_np) - if norm > 0: - item_np = item_np / norm + if NUMPY_AVAILABLE and query_np is not None and item_np is not None: similarity = float(np.dot(query_np, item_np)) else: + item_embedding = getattr(item, "embedding", None) # Simple cosine similarity dot_product = sum( a * b for a, b in zip(query_embedding, item_embedding) From a7dc1fdadc57025762d3565136f8c7e55d196cfc Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 30 Mar 2026 23:02:11 +0000 Subject: [PATCH 2/2] Fix CodeFactor complex method issue in embedding agent Co-authored-by: daggerstuff <261005129+daggerstuff@users.noreply.github.com> --- .Jules/bolt.md | 1 - api/embedding_agent/service.py | 66 ++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/.Jules/bolt.md b/.Jules/bolt.md index 15668eba..b507f81f 100644 --- a/.Jules/bolt.md +++ b/.Jules/bolt.md @@ -1,3 +1,2 @@ ## 2024-03-28 - Initial Bolt Run | Learning: The codebase is primarily Python, not TS/JS/Astro | Action: Adjust search queries to look for Python performance hotspots, such as nested loops, N+1 queries, unnecessary list comprehensions, or missing caching/memoization. ## 2024-03-28 - Optimize list comprehensions | Learning: Nested or repeated list comprehensions on large arrays cause unnecessary overhead in Python | Action: Consolidate repeated iterations into a single O(N) loop. -## 2026-03-30 - Lazy caching of normalized embeddings | Learning: Converting embeddings to numpy arrays and normalizing them per query per item inside a loop causes a massive CPU bottleneck during vector similarity searches. | Action: Cache the parsed and normalized numpy array on the knowledge item object itself upon first use, saving repetitive O(n) array allocations and math per query. diff --git a/api/embedding_agent/service.py b/api/embedding_agent/service.py index 3e7f5bcc..296bb0b7 100644 --- a/api/embedding_agent/service.py +++ b/api/embedding_agent/service.py @@ -502,6 +502,38 @@ def search_similar( model_used=self.config.model_name.value, ) + def _compute_similarity( + self, item: Any, query_embedding: List[float], query_np: Any + ) -> Optional[float]: + # ⚡ Bolt: Cache normalized numpy arrays safely to avoid regenerating on query + item_np = getattr(item, "_cached_np_embedding", None) + + if item_np is None: + item_embedding = getattr(item, "embedding", None) + if item_embedding is None: + return None + if NUMPY_AVAILABLE and query_np is not None: + item_np = np.array(item_embedding) + if self.config.normalize_embeddings: + norm = np.linalg.norm(item_np) + if norm > 0: + item_np = item_np / norm + try: + setattr(item, "_cached_np_embedding", item_np) + except AttributeError: + pass # Skip caching if item is a strict class (e.g., uses slots) + + if NUMPY_AVAILABLE and query_np is not None and item_np is not None: + return float(np.dot(query_np, item_np)) + + item_embedding = getattr(item, "embedding", None) + if item_embedding is None: + return None + dot_product = sum(a * b for a, b in zip(query_embedding, item_embedding)) + norm_q = sum(x**2 for x in query_embedding) ** 0.5 + norm_i = sum(x**2 for x in item_embedding) ** 0.5 + return dot_product / (norm_q * norm_i) if norm_q * norm_i > 0 else 0.0 + def _search_knowledge_items( self, query_embedding: List[float], @@ -526,42 +558,14 @@ def _search_knowledge_items( query_np = query_np / norm for item in self._knowledge_items: - # Filter by knowledge type if specified if knowledge_types: item_type = getattr(item, "knowledge_type", "general") if item_type not in [kt.value for kt in knowledge_types]: continue - # ⚡ Bolt: Cache parsed normalized numpy arrays to avoid regenerating on every query - item_np = getattr(item, "_cached_np_embedding", None) - - # Get item embedding if cache misses - if item_np is None: - item_embedding = getattr(item, "embedding", None) - if item_embedding is None: - continue - if NUMPY_AVAILABLE and query_np is not None: - item_np = np.array(item_embedding) - if self.config.normalize_embeddings: - norm = np.linalg.norm(item_np) - if norm > 0: - item_np = item_np / norm - setattr(item, "_cached_np_embedding", item_np) - - # Calculate similarity - if NUMPY_AVAILABLE and query_np is not None and item_np is not None: - similarity = float(np.dot(query_np, item_np)) - else: - item_embedding = getattr(item, "embedding", None) - # Simple cosine similarity - dot_product = sum( - a * b for a, b in zip(query_embedding, item_embedding) - ) - norm_q = sum(x**2 for x in query_embedding) ** 0.5 - norm_i = sum(x**2 for x in item_embedding) ** 0.5 - similarity = ( - dot_product / (norm_q * norm_i) if norm_q * norm_i > 0 else 0.0 - ) + similarity = self._compute_similarity(item, query_embedding, query_np) + if similarity is None: + continue # Apply threshold if similarity >= min_similarity: