Skip to content

Commit 47cd845

Browse files
committed
Address second round of PR #113 review comments
- regex.ts: raise branch length threshold from >= 1 to >= 3 in extractApiTerm — short branches like /a|bc/ now fall through to the longest-literal path which enforces the existing '< 3 chars → warn' rule, preventing extremely broad API queries such as 'a OR bc' - aggregate.ts: align regexFilter type with buildApiQuery return value; accept RegExp | null | undefined (via RegExp | null optional param) and update guard from !== undefined to != null so callers can pass the result of buildApiQuery directly without a null→undefined conversion - regex.test.ts: add test for short-branch alternation fallback (/a|bc/) - aggregate.test.ts: add test for null regexFilter treated as no-op
1 parent 5264a37 commit 47cd845

4 files changed

Lines changed: 26 additions & 5 deletions

File tree

src/aggregate.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,16 @@ describe("aggregate — regexFilter", () => {
195195
expect(groups).toHaveLength(2);
196196
});
197197

198+
it("keeps all matches when regexFilter is null (backward compat — null treated as no filter)", () => {
199+
const matches: CodeMatch[] = [
200+
makeMatchWithFragments("myorg/repoA", "src/a.ts", ["const x = 1"]),
201+
makeMatchWithFragments("myorg/repoB", "src/b.ts", ["const y = 2"]),
202+
];
203+
204+
const groups = aggregate(matches, new Set(), new Set(), false, null);
205+
expect(groups).toHaveLength(2);
206+
});
207+
198208
it("respects regex flags (case-insensitive)", () => {
199209
const matches: CodeMatch[] = [
200210
makeMatchWithFragments("myorg/repoA", "src/a.ts", ["import AXIOS from 'axios'"]),

src/aggregate.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ export function aggregate(
3434
excludedRepos: Set<string>,
3535
excludedExtractRefs: Set<string>,
3636
includeArchived = false,
37-
regexFilter?: RegExp,
37+
regexFilter?: RegExp | null,
3838
): RepoGroup[] {
3939
const map = new Map<string, CodeMatch[]>();
4040
for (const m of matches) {
4141
if (excludedRepos.has(m.repoFullName)) continue;
4242
if (!includeArchived && m.archived) continue;
4343
// Fix: when a regex filter is active, only keep matches where at least one
4444
// text_match fragment satisfies the pattern — see issue #111
45-
if (regexFilter !== undefined) {
45+
if (regexFilter != null) {
4646
const hasMatch = m.textMatches.some((tm) => {
4747
// Fix: reset lastIndex before each call — a global/sticky regex is
4848
// stateful and would produce false negatives on subsequent fragments.

src/regex.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ describe("buildApiQuery — top-level alternation → OR", () => {
6969
expect(r.warn).toBeUndefined();
7070
});
7171

72+
it("/a|bc/ — short branches (< 3 chars each) fall back to longest literal and warn", () => {
73+
// branches: "a" (1 char) and "bc" (2 chars) — both < 3 → fall back to
74+
// longestLiteralSequence("a|bc") → "bc" (2 chars) < 3 → warn + empty term
75+
const r = buildApiQuery("/a|bc/");
76+
expect(r.warn).toBeDefined();
77+
expect(r.apiQuery).toBe("");
78+
expect(r.regexFilter).not.toBeNull();
79+
});
80+
7281
it("/\\\\|foo/ — escaped backslash before | → | is top-level → falls back to longest literal 'foo'", () => {
7382
// Pattern \\|foo: \\ is an escaped backslash (matches literal \), | is top-level.
7483
// splitTopLevelAlternation gives ["\\", "foo"]; "\\" yields no useful literal

src/regex.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,12 @@ function extractApiTerm(pattern: string): { term: string; warn?: string } {
108108
// 1. Top-level alternation detection.
109109
const branches = splitTopLevelAlternation(pattern);
110110
if (branches.length > 1) {
111-
// Each branch must yield a meaningful literal to use the OR strategy;
112-
// otherwise fall through to the longest-literal approach.
111+
// Each branch must yield a meaningful literal (>= 3 chars) to use the OR
112+
// strategy — the same minimum enforced by the single-literal path below.
113+
// Branches shorter than 3 chars (e.g. /a|bc/) fall through so the global
114+
// "< 3 chars → warn + empty term" rule still applies.
113115
const branchTerms = branches.map((b) => longestLiteralSequence(b));
114-
if (branchTerms.every((t) => t.length >= 1)) {
116+
if (branchTerms.every((t) => t.length >= 3)) {
115117
return { term: branchTerms.join(" OR ") };
116118
}
117119
}

0 commit comments

Comments
 (0)