diff --git a/src/routes/Dashboard/DashboardComponentsV2View.test.tsx b/src/routes/Dashboard/DashboardComponentsV2View.test.tsx index d32aebf95..cb563d991 100644 --- a/src/routes/Dashboard/DashboardComponentsV2View.test.tsx +++ b/src/routes/Dashboard/DashboardComponentsV2View.test.tsx @@ -228,6 +228,7 @@ function createIndexEntry( description: "", io: "", implementation: "", + metadata: "", }, }; } diff --git a/src/routes/Dashboard/DashboardComponentsV2View.tsx b/src/routes/Dashboard/DashboardComponentsV2View.tsx index 403af2d09..73758ea84 100644 --- a/src/routes/Dashboard/DashboardComponentsV2View.tsx +++ b/src/routes/Dashboard/DashboardComponentsV2View.tsx @@ -113,6 +113,7 @@ const MATCH_FIELD_LABEL: Record = { description: "description", io: "inputs/outputs", implementation: "command", + metadata: "metadata", }; // Built-in sources are constants — only registered libraries vary per row. diff --git a/src/services/componentSearchIndex.test.ts b/src/services/componentSearchIndex.test.ts index 9cf55039d..72a6974f8 100644 --- a/src/services/componentSearchIndex.test.ts +++ b/src/services/componentSearchIndex.test.ts @@ -23,6 +23,7 @@ function makeRef( digest: partial.digest, url: partial.url, name: partial.name, + published_by: partial.published_by, spec: partial.spec, }; } @@ -84,15 +85,29 @@ describe("buildSearchIndex", () => { expect(index[0].source).toEqual(USER); }); - it("indexes name, description, io, and container command text", () => { + it("indexes name, description, io details, metadata, and container command text", () => { const index = buildSearchIndex([ makeSourced({ digest: "a", + published_by: "publisher@example.com", spec: { name: "train_model", description: "Train a regression model on a dataset.", - inputs: [{ name: "dataset" }], - outputs: [{ name: "model" }], + inputs: [ + { + name: "dataset", + type: "Dataset", + description: "Training table with labeled rows.", + annotations: { format: "parquet" }, + }, + ], + outputs: [ + { + name: "model", + type: { artifact: "Model" }, + description: "Serialized classifier artifact.", + }, + ], implementation: { container: { image: "python:3.11", @@ -100,6 +115,12 @@ describe("buildSearchIndex", () => { args: ["--epochs", "10"], }, }, + metadata: { + annotations: { + framework: "sklearn", + python_original_code: "do not index this large source blob", + }, + }, }, }), ]); @@ -107,7 +128,14 @@ describe("buildSearchIndex", () => { expect(index[0].searchable.name).toContain("train_model"); expect(index[0].searchable.description).toContain("regression"); expect(index[0].searchable.io).toContain("dataset"); + expect(index[0].searchable.io).toContain("training table"); + expect(index[0].searchable.io).toContain("parquet"); expect(index[0].searchable.io).toContain("model"); + expect(index[0].searchable.io).toContain("serialized classifier"); + expect(index[0].searchable.io).toContain("artifact"); + expect(index[0].searchable.metadata).toContain("sklearn"); + expect(index[0].searchable.metadata).toContain("publisher@example.com"); + expect(index[0].searchable.metadata).not.toContain("source blob"); expect(index[0].searchable.implementation).toContain("pandas.train"); expect(index[0].searchable.implementation).toContain("--epochs"); }); @@ -259,6 +287,62 @@ describe("lexicalSearch", () => { expect(results[0]?.digest).toBe("x"); }); + it("matches input/output descriptions and types", () => { + const index = buildSearchIndex([ + makeSourced({ + digest: "typed-io", + spec: { + name: "generic_processor", + inputs: [ + { + name: "data", + type: "Dataset", + description: "Tabular dataframe rows to clean.", + }, + ], + outputs: [ + { + name: "result", + type: { artifact: "Model" }, + description: "Trained classifier artifact.", + }, + ], + implementation: { container: { image: "x" } }, + }, + }), + ]); + + expect(lexicalSearch(index, "dataframe")[0]?.digest).toBe("typed-io"); + const typeResults = lexicalSearch(index, "artifact"); + expect(typeResults[0]?.digest).toBe("typed-io"); + expect(typeResults[0]?.matchedFields).toContain("io"); + }); + + it("matches component metadata and source information", () => { + const index = buildSearchIndex([ + makeSourced( + { + digest: "meta", + published_by: "publisher@example.com", + spec: { + name: "generic_processor", + inputs: [], + outputs: [], + implementation: { container: { image: "x" } }, + metadata: { annotations: { framework: "lightgbm" } }, + }, + }, + USER, + ), + ]); + + expect(lexicalSearch(index, "lightgbm")[0]?.matchedFields).toContain( + "metadata", + ); + expect(lexicalSearch(index, "publisher@example")[0]?.digest).toBe("meta"); + expect(lexicalSearch(index, "user")[0]?.digest).toBe("meta"); + }); + it("matches implementation/command text with the lowest weight", () => { const index = buildSearchIndex(fixtures); const results = lexicalSearch(index, "pandas"); diff --git a/src/services/componentSearchIndex.ts b/src/services/componentSearchIndex.ts index 024733620..ac7be68ed 100644 --- a/src/services/componentSearchIndex.ts +++ b/src/services/componentSearchIndex.ts @@ -16,7 +16,12 @@ import type { ComponentReference } from "@/utils/componentSpec"; import { getComponentName } from "@/utils/getComponentName"; /** Which field of a component matched the query. Surfaced in the UI. */ -export type MatchField = "name" | "description" | "io" | "implementation"; +export type MatchField = + | "name" + | "description" + | "io" + | "implementation" + | "metadata"; /** * Where a component came from. Attached to every index entry and threaded @@ -72,6 +77,59 @@ export function indexEntryToLexicalMatch(entry: IndexEntry): LexicalMatch { }; } +const ANNOTATION_KEYS_EXCLUDED_FROM_SEARCH = new Set([ + "editor.position", + "editor.collapsed", + "editor.flow-direction", + "flex-nodes", + "python_dependencies", + "python_original_code", + "tangleml.com/editor/task-color", + "tangleml.com/editor/edge-conduits", + "zIndex", +]); + +const MAX_ANNOTATION_TEXT_LENGTH = 500; + +function isNonEmptyString(value: unknown): value is string { + return typeof value === "string" && value.trim().length > 0; +} + +function stringifySearchValue(value: unknown): string { + switch (typeof value) { + case "string": + case "number": + case "boolean": + return String(value); + case "undefined": + return ""; + default: + if (value === null) return ""; + try { + return JSON.stringify(value); + } catch { + return ""; + } + } +} + +function extractAnnotationsText( + annotations: Record | undefined, +): string { + if (!annotations) return ""; + + const parts: string[] = []; + for (const [key, value] of Object.entries(annotations)) { + if (ANNOTATION_KEYS_EXCLUDED_FROM_SEARCH.has(key)) continue; + + const valueText = stringifySearchValue(value).trim(); + if (!valueText || valueText.length > MAX_ANNOTATION_TEXT_LENGTH) continue; + parts.push(key, valueText); + } + + return parts.join(" "); +} + /** * Flatten a container implementation's image + command + args into a single * lowercase string. Placeholder objects (e.g. `{ inputValue: "Where" }`) are @@ -123,6 +181,10 @@ export interface ComponentMetadata { description: string; inputNames: string[]; outputNames: string[]; + /** Names, descriptions, types, and annotations for inputs/outputs. */ + ioText: string; + /** Searchable component-level metadata annotations. */ + metadataText: string; } export function extractComponentMetadata( @@ -132,18 +194,24 @@ export function extractComponentMetadata( const spec = reference.spec; const description = spec?.description?.trim() ?? ""; const inputNames = - spec?.inputs - ?.map((i) => i.name) - .filter((n): n is string => typeof n === "string" && n.length > 0) ?? []; + spec?.inputs?.map((input) => input.name).filter(isNonEmptyString) ?? []; const outputNames = - spec?.outputs - ?.map((o) => o.name) - .filter((n): n is string => typeof n === "string" && n.length > 0) ?? []; + spec?.outputs?.map((output) => output.name).filter(isNonEmptyString) ?? []; + const ioText = [...(spec?.inputs ?? []), ...(spec?.outputs ?? [])] + .flatMap((ioSpec) => [ + ioSpec.name, + ioSpec.description, + stringifySearchValue(ioSpec.type), + extractAnnotationsText(ioSpec.annotations), + ]) + .filter(isNonEmptyString) + .join(" "); + const metadataText = extractAnnotationsText(spec?.metadata?.annotations); const hasUsefulMetadata = Boolean(spec?.name) || description.length > 0 || - inputNames.length > 0 || - outputNames.length > 0; + ioText.length > 0 || + metadataText.length > 0; if (!hasUsefulMetadata) return null; return { digest: reference.digest, @@ -151,6 +219,8 @@ export function extractComponentMetadata( description, inputNames, outputNames, + ioText, + metadataText, }; } @@ -175,10 +245,12 @@ export function buildSearchIndex(sourced: SourcedReference[]): IndexEntry[] { searchable: { name: metadata.name.toLowerCase(), description: metadata.description.toLowerCase(), - io: [...metadata.inputNames, ...metadata.outputNames] + io: metadata.ioText.toLowerCase(), + implementation: extractImplementationText(reference), + metadata: [metadata.metadataText, source.label, reference.published_by] + .filter(isNonEmptyString) .join(" ") .toLowerCase(), - implementation: extractImplementationText(reference), }, }); } @@ -244,8 +316,17 @@ const FIELD_WEIGHTS: Record = { description: 2, io: 2, implementation: 1, + metadata: 1, }; +const SEARCH_FIELDS: MatchField[] = [ + "name", + "description", + "io", + "implementation", + "metadata", +]; + interface SearchOptions { /** Max results to return. Default 20. */ limit?: number; @@ -271,12 +352,11 @@ function scoreEntry( entry: IndexEntry, tokens: string[], ): { score: number; matchedFields: MatchField[] } { - const fields: MatchField[] = ["name", "description", "io", "implementation"]; const matched = new Set(); let score = 0; for (const token of tokens) { - for (const field of fields) { + for (const field of SEARCH_FIELDS) { if (entry.searchable[field].includes(token)) { score += FIELD_WEIGHTS[field]; matched.add(field);