From 817441b6e370bf732f833ace441542543053f983 Mon Sep 17 00:00:00 2001 From: mbeaulne Date: Thu, 18 Jun 2026 13:48:53 -0400 Subject: [PATCH] Improve AI rerank payload for component search --- .../Dashboard/DashboardComponentsV2View.tsx | 51 +++++++++++++-- .../components/ComponentSearchV2Content.tsx | 22 ++++++- .../components/componentSearchV2Logic.test.ts | 20 ++++++ .../components/componentSearchV2Logic.ts | 42 ++++++++++++- .../Editor/hooks/useComponentSearchV2State.ts | 38 ++++++++--- ...uralLanguageComponentSearchService.test.ts | 43 +++++++++++-- .../naturalLanguageComponentSearchService.ts | 63 ++++++++++++++++--- 7 files changed, 246 insertions(+), 33 deletions(-) diff --git a/src/routes/Dashboard/DashboardComponentsV2View.tsx b/src/routes/Dashboard/DashboardComponentsV2View.tsx index 73758ea84..e8d1c2cb6 100644 --- a/src/routes/Dashboard/DashboardComponentsV2View.tsx +++ b/src/routes/Dashboard/DashboardComponentsV2View.tsx @@ -734,6 +734,26 @@ export const DashboardComponentsV2View = () => { ); })(); + const deepAiCandidateMatches: LexicalMatch[] = (() => { + if (trimmedQuery.length === 0) return []; + + const candidates: LexicalMatch[] = []; + const seenDigests = new Set(); + const allLexicalMatches = lexicalSearch(filteredIndex, query, { + limit: filteredIndex.length, + }); + for (const match of allLexicalMatches) { + seenDigests.add(match.digest); + candidates.push(match); + } + for (const entry of sortedIndex) { + if (seenDigests.has(entry.digest)) continue; + seenDigests.add(entry.digest); + candidates.push(indexEntryToLexicalMatch(entry)); + } + return candidates; + })(); + const { mutate: rerank, data: rerankData, @@ -760,21 +780,29 @@ export const DashboardComponentsV2View = () => { } }; - const handleSmartSearch = () => { + const startAiSearch = (matches: LexicalMatch[]) => { const trimmed = query.trim(); - if (trimmed.length === 0 || aiCandidateMatches.length === 0) return; + if (trimmed.length === 0 || matches.length === 0) return; - const candidates = aiCandidateMatches - .map((m) => componentReferenceToCandidate(m.reference)) + const candidates = matches + .map((m) => componentReferenceToCandidate(m.reference, m.source)) .filter((c): c is NonNullable => c !== null); if (candidates.length === 0) return; - setRerankBaseMatches(aiCandidateMatches); + setRerankBaseMatches(matches); setRerankedFor(trimmed); rerank({ query: trimmed, candidates }); }; + const handleSmartSearch = () => { + startAiSearch(aiCandidateMatches); + }; + + const handleDeepAiSearch = () => { + startAiSearch(deepAiCandidateMatches); + }; + const handleSourceToggle = (sourceKey: string) => { setDisabledSourceKeys((current) => current.includes(sourceKey) @@ -1014,6 +1042,19 @@ export const DashboardComponentsV2View = () => { > {isReranking ? : } + ) => { setQuery(event.target.value); @@ -73,6 +81,16 @@ export function ComponentSearchV2Content() { > {isReranking ? : } + { it("returns no AI candidates for an empty query", () => { expect(buildAiCandidateMatches(index, "")).toEqual([]); + expect(buildDeepAiCandidateMatches(index, "")).toEqual([]); }); it("falls back to a browse pool when literal search finds nothing", () => { @@ -271,4 +273,22 @@ describe("buildLexicalMatches / buildAiCandidateMatches", () => { "user-upload", ); }); + + it("builds deep AI candidates from all searchable components", () => { + const broadIndex = buildSearchIndex([ + ...Array.from({ length: 100 }, (_, i) => ({ + reference: ref(`train-${i}`, `train_${i}`), + source: source("standard"), + })), + { reference: ref("z-upload", "upload_file"), source: USER_SOURCE }, + ]); + + const candidates = buildDeepAiCandidateMatches(broadIndex, "train"); + + expect(candidates).toHaveLength(101); + expect(candidates.at(0)?.digest).toBe("train-0"); + expect(candidates.map((candidate) => candidate.digest)).toContain( + "z-upload", + ); + }); }); diff --git a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts index 462cbd1f4..c9bb0640d 100644 --- a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts +++ b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts @@ -69,8 +69,10 @@ export interface ComponentSearchV2State { browseFolders: UIComponentFolder[]; isLoading: boolean; canRerank: boolean; + canDeepRerank: boolean; isReranking: boolean; rerank: () => void; + deepRerank: () => void; } export function registeredSource( @@ -294,12 +296,13 @@ function appendUniqueMatches( target: LexicalMatch[], seenDigests: Set, matches: LexicalMatch[], + limit: number, ) { for (const match of matches) { if (seenDigests.has(match.digest)) continue; seenDigests.add(match.digest); target.push(match); - if (target.length >= AI_CANDIDATE_LIMIT) return; + if (target.length >= limit) return; } } @@ -344,12 +347,14 @@ export function buildAiCandidateMatches( limit: AI_LEXICAL_CANDIDATE_LIMIT, minLength: 1, }), + AI_CANDIDATE_LIMIT, ); appendUniqueMatches( candidates, seenDigests, buildSourceDiverseBrowseMatches(index), + AI_CANDIDATE_LIMIT, ); const sortedIndex = [...index].sort((a, b) => a.name.localeCompare(b.name)); @@ -357,6 +362,41 @@ export function buildAiCandidateMatches( candidates, seenDigests, sampleEvenly(sortedIndex, AI_CANDIDATE_LIMIT).map(indexEntryToLexicalMatch), + AI_CANDIDATE_LIMIT, + ); + + return candidates; +} + +/** + * Explicit deep AI search candidate pool. Sends every searchable component, + * ordered with lexical hits first so truncating providers still see likely + * matches early. + */ +export function buildDeepAiCandidateMatches( + index: IndexEntry[], + trimmedQuery: string, +): LexicalMatch[] { + if (trimmedQuery.length === 0) return []; + + const candidates: LexicalMatch[] = []; + const seenDigests = new Set(); + appendUniqueMatches( + candidates, + seenDigests, + lexicalSearch(index, trimmedQuery, { + limit: index.length, + minLength: 1, + }), + Number.MAX_SAFE_INTEGER, + ); + + const sortedIndex = [...index].sort((a, b) => a.name.localeCompare(b.name)); + appendUniqueMatches( + candidates, + seenDigests, + sortedIndex.map(indexEntryToLexicalMatch), + Number.MAX_SAFE_INTEGER, ); return candidates; diff --git a/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts b/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts index c318af196..af8165193 100644 --- a/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts +++ b/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts @@ -18,6 +18,7 @@ import { } from "@/providers/ComponentLibraryProvider/libraries/storage"; import { buildAiCandidateMatches, + buildDeepAiCandidateMatches, buildLexicalMatches, buildRerankScoreByDigest, buildResultFolders, @@ -175,6 +176,10 @@ export function useComponentSearchV2State( const trimmedQuery = query.trim(); const lexicalMatches = buildLexicalMatches(index, trimmedQuery); const aiCandidateMatches = buildAiCandidateMatches(index, trimmedQuery); + const deepAiCandidateMatches = buildDeepAiCandidateMatches( + index, + trimmedQuery, + ); const { mutate, @@ -202,23 +207,33 @@ export function useComponentSearchV2State( ? rerankedMatches(rerankData, rerankBaseMatches) : lexicalMatches; - const rerank = () => { - if (!trimmedQuery || aiCandidateMatches.length === 0 || !isConfigured) { - return; - } + const startRerank = ( + matches: LexicalMatch[], + { scoreAllCandidates }: { scoreAllCandidates: boolean }, + ) => { + if (!trimmedQuery || matches.length === 0 || !isConfigured) return; - const candidates = aiCandidateMatches - .map((match) => componentReferenceToCandidate(match.reference)) + const candidates = matches + .map((match) => + componentReferenceToCandidate(match.reference, match.source), + ) .filter((candidate): candidate is NonNullable => Boolean(candidate), ); if (candidates.length === 0) return; - setRerankBaseMatches(aiCandidateMatches); + setRerankBaseMatches(matches); setRerankedFor(trimmedQuery); - // Score every candidate so each displayed result shows a relevance %. - mutate({ query: trimmedQuery, candidates, scoreAllCandidates: true }); + mutate({ query: trimmedQuery, candidates, scoreAllCandidates }); + }; + + const rerank = () => { + startRerank(aiCandidateMatches, { scoreAllCandidates: true }); + }; + + const deepRerank = () => { + startRerank(deepAiCandidateMatches, { scoreAllCandidates: false }); }; const rerankScoreByDigest = buildRerankScoreByDigest( @@ -243,7 +258,12 @@ export function useComponentSearchV2State( isHydrating, canRerank: trimmedQuery.length > 0 && aiCandidateMatches.length > 0 && isConfigured, + canDeepRerank: + trimmedQuery.length > 0 && + deepAiCandidateMatches.length > 0 && + isConfigured, isReranking, rerank, + deepRerank, }; } diff --git a/src/services/naturalLanguageComponentSearchService.test.ts b/src/services/naturalLanguageComponentSearchService.test.ts index 0d51f3a33..7b604d6bc 100644 --- a/src/services/naturalLanguageComponentSearchService.test.ts +++ b/src/services/naturalLanguageComponentSearchService.test.ts @@ -90,23 +90,54 @@ describe("componentReferenceToCandidate", () => { }); }); - it("includes input/output names when present", () => { + it("includes input/output types, descriptions, and source when present", () => { const ref: ComponentReference = { digest: "abc", spec: { name: "train", description: "", - inputs: [{ name: "dataset" }], - outputs: [{ name: "model" }], + inputs: [ + { + name: "dataset", + type: "Dataset", + description: "Training data", + }, + ], + outputs: [ + { + name: "model", + type: { Model: { format: "xgboost" } }, + description: "Trained model", + }, + ], implementation: { container: { image: "x" } }, }, }; - expect(componentReferenceToCandidate(ref)).toEqual({ + expect( + componentReferenceToCandidate(ref, { + kind: "published", + label: "Published", + id: "published", + }), + ).toEqual({ id: "abc", name: "train", description: "", - inputs: ["dataset"], - outputs: ["model"], + source: { kind: "published", label: "Published" }, + inputs: [ + { + name: "dataset", + type: "Dataset", + description: "Training data", + }, + ], + outputs: [ + { + name: "model", + type: '{"Model":{"format":"xgboost"}}', + description: "Trained model", + }, + ], }); }); }); diff --git a/src/services/naturalLanguageComponentSearchService.ts b/src/services/naturalLanguageComponentSearchService.ts index 18192928a..baaf3bb93 100644 --- a/src/services/naturalLanguageComponentSearchService.ts +++ b/src/services/naturalLanguageComponentSearchService.ts @@ -11,24 +11,42 @@ * judgment over a small, well-defined list when literal matching is not enough. */ -import type { ComponentReference } from "@/utils/componentSpec"; +import type { + ComponentReference, + InputSpec, + OutputSpec, +} from "@/utils/componentSpec"; import { getComponentName } from "@/utils/getComponentName"; import { isRecord } from "@/utils/typeGuards"; -import { extractComponentMetadata } from "./componentSearchIndex"; +import { + type ComponentSearchSource, + extractComponentMetadata, +} from "./componentSearchIndex"; /** * Compact candidate shape sent to the model. Only the fields that inform - * judgment: name, description, i/o names. Implementation/command text is - * already covered by the lexical layer and would just inflate the prompt. + * judgment: name, description, source, and i/o summaries. Implementation/ + * command text is already covered by the lexical layer and would just inflate + * the prompt. */ +interface RerankCandidateIO { + name: string; + type?: string; + description?: string; +} + export interface RerankCandidate { /** Component digest. Used to round-trip the model's response to references. */ id: string; name: string; description: string; - inputs?: string[]; - outputs?: string[]; + source?: { + kind: ComponentSearchSource["kind"]; + label: string; + }; + inputs?: RerankCandidateIO[]; + outputs?: RerankCandidateIO[]; } export interface RerankedMatch { @@ -119,19 +137,44 @@ function isMatchArray(value: unknown): value is RerankedMatch[] { * the model. Returns null when the reference has no usable metadata — those * would just waste tokens. */ +function stringifyCandidateField(value: unknown): string { + if (typeof value === "string") return value.trim(); + if (value === null || value === undefined) return ""; + try { + return JSON.stringify(value); + } catch { + return ""; + } +} + +function componentIoToCandidateIo( + ioSpec: InputSpec | OutputSpec, +): RerankCandidateIO { + const type = stringifyCandidateField(ioSpec.type); + const description = ioSpec.description?.trim() ?? ""; + return { + name: ioSpec.name, + ...(type ? { type } : {}), + ...(description ? { description } : {}), + }; +} + export function componentReferenceToCandidate( reference: ComponentReference, + source?: ComponentSearchSource, ): RerankCandidate | null { const metadata = extractComponentMetadata(reference); if (!metadata) return null; + const inputs = reference.spec?.inputs?.map(componentIoToCandidateIo) ?? []; + const outputs = reference.spec?.outputs?.map(componentIoToCandidateIo) ?? []; + return { id: metadata.digest, name: metadata.name, description: metadata.description, - ...(metadata.inputNames.length > 0 ? { inputs: metadata.inputNames } : {}), - ...(metadata.outputNames.length > 0 - ? { outputs: metadata.outputNames } - : {}), + ...(source ? { source: { kind: source.kind, label: source.label } } : {}), + ...(inputs.length > 0 ? { inputs } : {}), + ...(outputs.length > 0 ? { outputs } : {}), }; }