diff --git a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts index ef2287926..f237abd89 100644 --- a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts +++ b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts @@ -251,4 +251,24 @@ describe("buildLexicalMatches / buildAiCandidateMatches", () => { const candidates = buildAiCandidateMatches(index, "qqzznomatch"); expect(candidates.map((m) => m.digest)).toEqual(["alpha", "zebra"]); }); + + it("adds source-diverse candidates beyond the top lexical hits", () => { + const broadIndex = buildSearchIndex([ + ...Array.from({ length: 100 }, (_, i) => ({ + reference: ref(`train-${i}`, `train_${i}`), + source: source("standard"), + })), + { + reference: ref("user-upload", "upload_file"), + source: USER_SOURCE, + }, + ]); + + const candidates = buildAiCandidateMatches(broadIndex, "train"); + + expect(candidates).toHaveLength(80); + expect(candidates.map((candidate) => candidate.digest)).toContain( + "user-upload", + ); + }); }); diff --git a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts index e8baa1e90..462cbd1f4 100644 --- a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts +++ b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts @@ -29,9 +29,9 @@ import type { /** How many lexical hits to display before the user asks for AI judgment. */ const LEXICAL_RESULT_LIMIT = 50; -// Candidate pool sent to AI rerank on click. Matches LEXICAL_RESULT_LIMIT so -// every displayed result is scored and can show a relevance percentage. -const AI_CANDIDATE_LIMIT = 50; +const AI_CANDIDATE_LIMIT = 80; +const AI_LEXICAL_CANDIDATE_LIMIT = 60; +const AI_SOURCE_DIVERSITY_CANDIDATES_PER_SOURCE = 8; // Scores at or below this are treated as the model excluding a candidate: such // items keep their place in the list but are not badged as relevance matches. const RERANK_EXCLUSION_THRESHOLD = 0.01; @@ -281,10 +281,52 @@ export function buildLexicalMatches( }); } +function sampleEvenly(items: T[], limit: number): T[] { + if (items.length <= limit) return items; + const step = items.length / limit; + return Array.from( + { length: limit }, + (_, index) => items[Math.floor(index * step)], + ); +} + +function appendUniqueMatches( + target: LexicalMatch[], + seenDigests: Set, + matches: LexicalMatch[], +) { + for (const match of matches) { + if (seenDigests.has(match.digest)) continue; + seenDigests.add(match.digest); + target.push(match); + if (target.length >= AI_CANDIDATE_LIMIT) return; + } +} + +function buildSourceDiverseBrowseMatches(index: IndexEntry[]): LexicalMatch[] { + const bySource = new Map(); + for (const entry of index) { + const key = `${entry.source.kind}:${entry.source.id}`; + bySource.set(key, [...(bySource.get(key) ?? []), entry]); + } + + const matches: LexicalMatch[] = []; + for (const entries of bySource.values()) { + const sorted = [...entries].sort((a, b) => a.name.localeCompare(b.name)); + matches.push( + ...sampleEvenly(sorted, AI_SOURCE_DIVERSITY_CANDIDATES_PER_SOURCE).map( + indexEntryToLexicalMatch, + ), + ); + } + + return matches; +} + /** - * Bounded candidate pool for AI rerank. Prefers broad lexical hits; when - * literal matching finds nothing it falls back to an alphabetical browse slice - * so natural-language queries stay useful. + * Bounded candidate pool for AI rerank. Starts with the strongest lexical hits, + * then adds a source-diverse browse sample so AI can rescue plausible matches + * that literal scoring missed. */ export function buildAiCandidateMatches( index: IndexEntry[], @@ -292,16 +334,32 @@ export function buildAiCandidateMatches( ): LexicalMatch[] { if (trimmedQuery.length === 0) return []; - const broadMatches = lexicalSearch(index, trimmedQuery, { - limit: AI_CANDIDATE_LIMIT, - minLength: 1, - }); - if (broadMatches.length > 0) return broadMatches; + const candidates: LexicalMatch[] = []; + const seenDigests = new Set(); + + appendUniqueMatches( + candidates, + seenDigests, + lexicalSearch(index, trimmedQuery, { + limit: AI_LEXICAL_CANDIDATE_LIMIT, + minLength: 1, + }), + ); + + appendUniqueMatches( + candidates, + seenDigests, + buildSourceDiverseBrowseMatches(index), + ); + + const sortedIndex = [...index].sort((a, b) => a.name.localeCompare(b.name)); + appendUniqueMatches( + candidates, + seenDigests, + sampleEvenly(sortedIndex, AI_CANDIDATE_LIMIT).map(indexEntryToLexicalMatch), + ); - return [...index] - .sort((a, b) => a.name.localeCompare(b.name)) - .slice(0, AI_CANDIDATE_LIMIT) - .map(indexEntryToLexicalMatch); + return candidates; } export function buildRerankScoreByDigest( diff --git a/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts b/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts index 76665505e..c318af196 100644 --- a/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts +++ b/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts @@ -215,9 +215,7 @@ export function useComponentSearchV2State( if (candidates.length === 0) return; - setRerankBaseMatches( - lexicalMatches.length > 0 ? lexicalMatches : aiCandidateMatches, - ); + setRerankBaseMatches(aiCandidateMatches); setRerankedFor(trimmedQuery); // Score every candidate so each displayed result shows a relevance %. mutate({ query: trimmedQuery, candidates, scoreAllCandidates: true });