From ce20787c09db16de587d3a3200309f89bd4c4b6f Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 18 May 2026 21:28:21 +0000
Subject: [PATCH] perf: optimize EdgeCaseDetector to prevent O(N*M) kernel
 bottleneck

Co-authored-by: theRebelliousNerd <187437903+theRebelliousNerd@users.noreply.github.com>
---
 fix_test.py                             | 32 ---------
 internal/campaign/edge_case_detector.go | 96 ++++++++++++++++---------
 plan.txt                                | 12 ----
 3 files changed, 61 insertions(+), 79 deletions(-)
 delete mode 100644 fix_test.py
 delete mode 100644 plan.txt

diff --git a/fix_test.py b/fix_test.py
deleted file mode 100644
index d753d6b9..00000000
--- a/fix_test.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import re
-
-with open("tests/e2e/session_clean_loop_integration_test.go", "r") as f:
-    content = f.read()
-
-mock_kernel_code = """
-type mockKernel struct {
-	asserted []types.Fact
-	mu       sync.Mutex
-}
-func (m *mockKernel) LoadFacts(facts []types.Fact) error { return nil }
-func (m *mockKernel) Query(predicate string) ([]types.Fact, error) { return nil, nil }
-func (m *mockKernel) QueryAll() (map[string][]types.Fact, error) { return nil, nil }
-func (m *mockKernel) Assert(fact types.Fact) error {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	m.asserted = append(m.asserted, fact)
-	return nil
-}
-func (m *mockKernel) AssertBatch(facts []types.Fact) error { return nil }
-func (m *mockKernel) Retract(predicate string) error { return nil }
-func (m *mockKernel) RetractFact(fact types.Fact) error { return nil }
-func (m *mockKernel) UpdateSystemFacts() error { return nil }
-func (m *mockKernel) AppendPolicy(policy string) error { return nil }
-func (m *mockKernel) Reset() {}
-"""
-
-# Replace existing mockKernel
-content = re.sub(r'type mockKernel struct \{[\s\S]*?\}[\s\S]*?func \(m \*mockKernel\) Retract\(query string\) error \{ return nil \}', mock_kernel_code, content)
-
-with open("tests/e2e/session_clean_loop_integration_test.go", "w") as f:
-    f.write(content)
diff --git a/internal/campaign/edge_case_detector.go b/internal/campaign/edge_case_detector.go
index 6a84513f..685e8196 100644
--- a/internal/campaign/edge_case_detector.go
+++ b/internal/campaign/edge_case_detector.go
@@ -9,6 +9,7 @@ import (
 	"path/filepath"
 	"sort"
 	"strings"
+	"sync"
 	"time"
 
 	"codenerd/internal/core"
@@ -92,6 +93,12 @@ type SplitSuggestion struct {
 	Reason      string   `json:"reason"`
 }
 
+// kernelFacts caches kernel facts to avoid O(N*M) query costs
+type kernelFacts struct {
+	dependencies []core.Fact
+	complexities []core.Fact
+}
+
 // EdgeCaseDetector analyzes files to determine appropriate actions.
 type EdgeCaseDetector struct {
 	kernel  *core.RealKernel
@@ -160,21 +167,57 @@ func (d *EdgeCaseDetector) AnalyzeFiles(ctx context.Context, paths []string, int
 	timer := logging.StartTimer(logging.CategoryCampaign, "AnalyzeFiles")
 	defer timer.Stop()
 
-	ctx, cancel := context.WithTimeout(ctx, d.config.AnalysisTimeout*time.Duration(len(paths)))
+	// Prevent immediate timeout if paths is empty
+	timeoutDuration := d.config.AnalysisTimeout * time.Duration(len(paths))
+	if len(paths) == 0 {
+		timeoutDuration = d.config.AnalysisTimeout // at least one unit of time
+	}
+	ctx, cancel := context.WithTimeout(ctx, timeoutDuration)
 	defer cancel()
 
+	// Pre-fetch facts to avoid O(N*M) performance penalty
+	var kFacts kernelFacts
+	if d.kernel != nil {
+		kFacts.dependencies, _ = d.kernel.Query("dependency_link")
+		kFacts.complexities, _ = d.kernel.Query("cyclomatic_complexity")
+	}
+
 	decisions := make([]FileDecision, 0, len(paths))
+	decisionCh := make(chan FileDecision, len(paths))
+
+	// Run in parallel with a bounded number of workers
+	var wg sync.WaitGroup
+	sem := make(chan struct{}, 8)
 
 	for _, path := range paths {
-		select {
-		case <-ctx.Done():
-			logging.Campaign("Edge case analysis interrupted: %d/%d files analyzed before timeout", len(decisions), len(paths))
-			return decisions, ctx.Err()
-		default:
-		}
+		wg.Add(1)
+		go func(p string) {
+			defer wg.Done()
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			select {
+			case <-ctx.Done():
+				return
+			default:
+				decisionCh <- d.analyzeFile(ctx, p, intelligence, &kFacts)
+			}
+		}(path)
+	}
+
+	// Wait for all workers in a separate goroutine
+	go func() {
+		wg.Wait()
+		close(decisionCh)
+	}()
 
-		decision := d.analyzeFile(ctx, path, intelligence)
-		decisions = append(decisions, decision)
+	for dec := range decisionCh {
+		decisions = append(decisions, dec)
+	}
+
+	if ctx.Err() != nil {
+		logging.Campaign("Edge case analysis interrupted: %d/%d files analyzed before timeout", len(decisions), len(paths))
+		return decisions, ctx.Err()
 	}
 
 	// Sort by priority: refactor_first > modularize > create > extend > skip
@@ -252,7 +295,7 @@ func (d *EdgeCaseDetector) logDecisionSummary(decisions []FileDecision) {
 }
 
 // analyzeFile performs analysis on a single file.
-func (d *EdgeCaseDetector) analyzeFile(ctx context.Context, path string, intel *IntelligenceReport) FileDecision {
+func (d *EdgeCaseDetector) analyzeFile(ctx context.Context, path string, intel *IntelligenceReport, kFacts *kernelFacts) FileDecision {
 	// Check for context cancellation
 	if err := ctx.Err(); err != nil {
 		return FileDecision{Path: path, RecommendedAction: ActionSkip, Reasoning: "Analysis cancelled"}
@@ -282,7 +325,7 @@ func (d *EdgeCaseDetector) analyzeFile(ctx context.Context, path string, intel *
 	}
 
 	// Gather metrics from intelligence report
-	d.gatherMetrics(&decision, path, intel)
+	d.gatherMetrics(&decision, path, intel, kFacts)
 
 	// Apply decision logic
 	decision.RecommendedAction, decision.Reasoning = d.determineAction(decision)
@@ -294,7 +337,7 @@ func (d *EdgeCaseDetector) analyzeFile(ctx context.Context, path string, intel *
 }
 
 // gatherMetrics populates decision metrics from intelligence data.
-func (d *EdgeCaseDetector) gatherMetrics(decision *FileDecision, path string, intel *IntelligenceReport) {
+func (d *EdgeCaseDetector) gatherMetrics(decision *FileDecision, path string, intel *IntelligenceReport, kFacts *kernelFacts) {
 	if intel == nil {
 		return
 	}
@@ -327,20 +370,14 @@ func (d *EdgeCaseDetector) gatherMetrics(decision *FileDecision, path string, in
 
 	// Query kernel for dependencies
 	if d.kernel != nil {
-		d.queryDependencies(decision, path)
-		d.queryComplexity(decision, path)
+		d.queryDependencies(decision, path, kFacts)
+		d.queryComplexity(decision, path, kFacts)
 	}
 }
 
 // queryDependencies gets file dependencies from the kernel.
-func (d *EdgeCaseDetector) queryDependencies(decision *FileDecision, path string) {
-	// Query dependency_link for dependencies
-	facts, err := d.kernel.Query("dependency_link")
-	if err != nil {
-		return
-	}
-
-	for _, fact := range facts {
+func (d *EdgeCaseDetector) queryDependencies(decision *FileDecision, path string, kFacts *kernelFacts) {
+	for _, fact := range kFacts.dependencies {
 		if len(fact.Args) >= 3 {
 			file := d.parseArg(fact.Args[0])
 			imported := d.parseArg(fact.Args[2])
@@ -359,18 +396,12 @@ func (d *EdgeCaseDetector) queryDependencies(decision *FileDecision, path string
 }
 
 // queryComplexity estimates complexity from kernel facts.
-func (d *EdgeCaseDetector) queryComplexity(decision *FileDecision, path string) {
-	// Query for complexity-related facts
-	facts, err := d.kernel.Query("cyclomatic_complexity")
-	if err != nil {
-		return
-	}
-
+func (d *EdgeCaseDetector) queryComplexity(decision *FileDecision, path string, kFacts *kernelFacts) {
 	var (
 		maxComplexity float64
 		hasComplexity bool
 	)
-	for _, fact := range facts {
+	for _, fact := range kFacts.complexities {
 		if len(fact.Args) >= 3 {
 			file := d.parseArg(fact.Args[0])
 			if d.matchesPath(file, path) {
@@ -820,10 +851,5 @@ func (a *EdgeCaseAnalysis) GetPreworkTasks() []string {
 // Files marked `Exists: true` in intelligence might have been deleted. Should verify
 // against `os.Stat(path)` to prevent invalid `ActionExtend` or `ActionModularize` commands.
 
-// TODO: Missing Edge Case - Performance Vector: Massive volume of facts in kernel.
-// `queryDependencies` and `queryComplexity` executes an O(N) fetch of all facts for *each file*.
-// For campaigns on large repos, this becomes O(N * M) and hangs the orchestrator.
-// Need to parallelize AnalyzeFiles or parameterize kernel queries.
-
 // TODO: Missing Edge Case - Extreme Values: Max file size boundaries.
 // `LineCount` bounds checking should prevent `float64` precision or overflow issues
diff --git a/plan.txt b/plan.txt
deleted file mode 100644
index 3ed3c0b6..00000000
--- a/plan.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-1. In `internal/campaign/context_pager_test.go`, create a new test `TestActivatePhase_MalformedPhaseID`.
-2. Inside `TestActivatePhase_MalformedPhaseID`:
-    a. Initialize `kernel := &MockKernel{}` and `cp := NewContextPager(kernel, &MockLLMClient{}, 100000)`.
-    b. Create a `Phase` with `ID: "malformed id !@#$%\n^&*()"` and some dummy `Name` and `Tasks` with `Artifacts` so that `phase_context_atom` facts are created in the kernel.
-    c. Call `err := cp.ActivatePhase(context.Background(), phase)`.
-    d. Check `if err != nil`.
-    e. Iterate over `kernel.Facts` to find `phase_context_atom` facts.
-    f. Assert that `len(kernel.Facts)` contains the malformed ID exactly as passed. Because `phase.ID` is used directly in `ActivatePhase`, `f.Args[0]` (the first argument to `phase_context_atom`) should match the malformed ID exactly.
-3. Remove the `// TODO: TEST_GAP: User Request Extremes - ActivatePhase with malformed Phase IDs (spaces, special chars) injected into predicates` comment.
-4. Run `go test -v ./internal/campaign/... -run TestActivatePhase_MalformedPhaseID` to verify it passes.
-5. Complete pre-commit steps to ensure proper testing, verification, review, and reflection are done.
-6. Submit the changes.