diff --git a/.jules/bolt.md b/.jules/bolt.md index 8aef41f..8d16c94 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -8,3 +8,6 @@ ## 2024-05-24 - Pre-compile RegExp in nested loops **Learning:** Instantiating `new RegExp()` inside nested array methods like `.filter` and `.some` creates a severe O(N*M) performance bottleneck, especially when matching two large lists (e.g., documented tests vs. actual test files). **Action:** Always pre-compile regular expressions and derived strings into an array of "matcher" objects outside of the loop before iterating, which shifts the instantiation cost from O(N*M) to O(N). +## 2024-05-24 - Pre-compute properties and use Sets instead of nested multi-pass arrays +**Learning:** Performing `basename()` calculations inside nested multi-pass array iterations (`.filter()` and `.some()`) generates substantial O(N*M) bottlenecks. +**Action:** When filtering two lists against each other using comparisons that require calculations (like regex matching or `basename`), pre-compute the target properties for all items, and cross-reference them in a single double `for`-loop utilizing `Set`s to track matches, drastically reducing iterations and redundant string operations. diff --git a/cli/commands/diff.mjs b/cli/commands/diff.mjs index 856eea1..b8c63c3 100644 --- a/cli/commands/diff.mjs +++ b/cli/commands/diff.mjs @@ -346,10 +346,11 @@ function diffTests(dir, config = {}) { if (docTests.size === 0 && codeTests.size === 0) return null; // Glob-aware matching (documented entries are often patterns or basenames). - const codeArr = [...codeTests]; // PERFORMANCE OPTIMIZATION: Pre-compile regular expressions to avoid O(N*M) - // instantiation bottlenecks inside the nested .filter and .some loops below. + // instantiation bottlenecks. We also pre-compute the basename of code paths + // and resolve matches in a single cross-check loop, changing multi-pass array + // filtering to O(N + M) complexity and preventing redundant string processing overhead. const docMatchers = [...docTests].map(docEntry => { const entry = String(docEntry).trim(); const hasSlash = entry.includes('/'); @@ -363,17 +364,30 @@ function diffTests(dir, config = {}) { }; }); - const matches = (matcher, codeRel) => { - const subject = matcher.hasSlash ? codeRel : basename(codeRel); - return matcher.rx.test(subject); - }; + const codeItems = [...codeTests].map(c => ({ + rel: c, + base: basename(c) + })); + + const matchedDocs = new Set(); + const matchedCode = new Set(); + + for (const matcher of docMatchers) { + for (const code of codeItems) { + const subject = matcher.hasSlash ? code.rel : code.base; + if (matcher.rx.test(subject)) { + matchedDocs.add(matcher.original); + matchedCode.add(code.rel); + } + } + } return { title: 'Test Files', icon: '🧪', - onlyInDocs: docMatchers.filter(m => !codeArr.some(c => matches(m, c))).map(m => m.original), - onlyInCode: codeArr.filter(c => !docMatchers.some(m => matches(m, c))), - matched: docMatchers.filter(m => codeArr.some(c => matches(m, c))).map(m => m.original), + onlyInDocs: docMatchers.filter(m => !matchedDocs.has(m.original)).map(m => m.original), + onlyInCode: codeItems.filter(c => !matchedCode.has(c.rel)).map(c => c.rel), + matched: [...matchedDocs], }; } diff --git a/cli/validators/docs-diff.mjs b/cli/validators/docs-diff.mjs index f9fef2b..4d1b8db 100644 --- a/cli/validators/docs-diff.mjs +++ b/cli/validators/docs-diff.mjs @@ -168,10 +168,11 @@ function diffTests(dir, config) { // bare basenames or full paths. Treat each documented entry as a glob and // match it against code test paths (or basenames when the entry has no slash). // Exact-string comparison produced the false "N documented but not found". - const codeArr = [...codeTests]; // PERFORMANCE OPTIMIZATION: Pre-compile regular expressions to avoid O(N*M) - // instantiation bottlenecks inside the nested .filter and .some loops below. + // instantiation bottlenecks. We also pre-compute the basename of code paths + // and resolve matches in a single cross-check loop, changing multi-pass array + // filtering to O(N + M) complexity and preventing redundant string processing overhead. const docMatchers = [...docTests].map(docEntry => { const entry = String(docEntry).trim(); const hasSlash = entry.includes('/'); @@ -188,15 +189,28 @@ function diffTests(dir, config) { }; }); - const matches = (matcher, codeRel) => { - const subject = matcher.hasSlash ? codeRel : basename(codeRel); - return matcher.rx.test(subject); - }; + const codeItems = [...codeTests].map(c => ({ + rel: c, + base: basename(c) + })); + + const matchedDocs = new Set(); + const matchedCode = new Set(); + + for (const matcher of docMatchers) { + for (const code of codeItems) { + const subject = matcher.hasSlash ? code.rel : code.base; + if (matcher.rx.test(subject)) { + matchedDocs.add(matcher.original); + matchedCode.add(code.rel); + } + } + } return { title: 'Test Files', - onlyInDocs: docMatchers.filter(m => !codeArr.some(c => matches(m, c))).map(m => m.original), - onlyInCode: codeArr.filter(c => !docMatchers.some(m => matches(m, c))), + onlyInDocs: docMatchers.filter(m => !matchedDocs.has(m.original)).map(m => m.original), + onlyInCode: codeItems.filter(c => !matchedCode.has(c.rel)).map(c => c.rel), }; } diff --git a/test-draft.js b/test-draft.js deleted file mode 100644 index 6caa430..0000000 --- a/test-draft.js +++ /dev/null @@ -1,20 +0,0 @@ -import { validateMetricsConsistency } from './cli/validators/metrics-consistency.mjs'; -import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { tmpdir } from 'node:os'; - -const tmpDir = mkdtempSync(join(tmpdir(), 'docguard-test-')); - -try { - writeFileSync(join(tmpDir, 'README.md'), 'We have 20 checks, 12 validators.'); - - const guardResults = []; - for(let i=0; i<11; i++) { - guardResults.push({ status: 'passed', total: i === 0 ? 5 : 1 }); - } - - const result = validateMetricsConsistency(tmpDir, {}, guardResults); - console.log(result); -} finally { - rmSync(tmpDir, { recursive: true, force: true }); -}