Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/routes/Dashboard/DashboardComponentsV2View.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ function createIndexEntry(
description: "",
io: "",
implementation: "",
metadata: "",
},
};
}
Expand Down
1 change: 1 addition & 0 deletions src/routes/Dashboard/DashboardComponentsV2View.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ const MATCH_FIELD_LABEL: Record<MatchField, string> = {
description: "description",
io: "inputs/outputs",
implementation: "command",
metadata: "metadata",
};

// Built-in sources are constants — only registered libraries vary per row.
Expand Down
90 changes: 87 additions & 3 deletions src/services/componentSearchIndex.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ function makeRef(
digest: partial.digest,
url: partial.url,
name: partial.name,
published_by: partial.published_by,
spec: partial.spec,
};
}
Expand Down Expand Up @@ -84,30 +85,57 @@ describe("buildSearchIndex", () => {
expect(index[0].source).toEqual(USER);
});

it("indexes name, description, io, and container command text", () => {
it("indexes name, description, io details, metadata, and container command text", () => {
const index = buildSearchIndex([
makeSourced({
digest: "a",
published_by: "publisher@example.com",
spec: {
name: "train_model",
description: "Train a regression model on a dataset.",
inputs: [{ name: "dataset" }],
outputs: [{ name: "model" }],
inputs: [
{
name: "dataset",
type: "Dataset",
description: "Training table with labeled rows.",
annotations: { format: "parquet" },
},
],
outputs: [
{
name: "model",
type: { artifact: "Model" },
description: "Serialized classifier artifact.",
},
],
implementation: {
container: {
image: "python:3.11",
command: ["python", "-m", "pandas.train"],
args: ["--epochs", "10"],
},
},
metadata: {
annotations: {
framework: "sklearn",
python_original_code: "do not index this large source blob",
},
},
},
}),
]);
expect(index).toHaveLength(1);
expect(index[0].searchable.name).toContain("train_model");
expect(index[0].searchable.description).toContain("regression");
expect(index[0].searchable.io).toContain("dataset");
expect(index[0].searchable.io).toContain("training table");
expect(index[0].searchable.io).toContain("parquet");
expect(index[0].searchable.io).toContain("model");
expect(index[0].searchable.io).toContain("serialized classifier");
expect(index[0].searchable.io).toContain("artifact");
expect(index[0].searchable.metadata).toContain("sklearn");
expect(index[0].searchable.metadata).toContain("publisher@example.com");
expect(index[0].searchable.metadata).not.toContain("source blob");
expect(index[0].searchable.implementation).toContain("pandas.train");
expect(index[0].searchable.implementation).toContain("--epochs");
});
Expand Down Expand Up @@ -259,6 +287,62 @@ describe("lexicalSearch", () => {
expect(results[0]?.digest).toBe("x");
});

it("matches input/output descriptions and types", () => {
const index = buildSearchIndex([
makeSourced({
digest: "typed-io",
spec: {
name: "generic_processor",
inputs: [
{
name: "data",
type: "Dataset",
description: "Tabular dataframe rows to clean.",
},
],
outputs: [
{
name: "result",
type: { artifact: "Model" },
description: "Trained classifier artifact.",
},
],
implementation: { container: { image: "x" } },
},
}),
]);

expect(lexicalSearch(index, "dataframe")[0]?.digest).toBe("typed-io");
const typeResults = lexicalSearch(index, "artifact");
expect(typeResults[0]?.digest).toBe("typed-io");
expect(typeResults[0]?.matchedFields).toContain("io");
});

it("matches component metadata and source information", () => {
const index = buildSearchIndex([
makeSourced(
{
digest: "meta",
published_by: "publisher@example.com",
spec: {
name: "generic_processor",
inputs: [],
outputs: [],
implementation: { container: { image: "x" } },
metadata: { annotations: { framework: "lightgbm" } },
},
},
USER,
),
]);

expect(lexicalSearch(index, "lightgbm")[0]?.matchedFields).toContain(
"metadata",
);
expect(lexicalSearch(index, "publisher@example")[0]?.digest).toBe("meta");
expect(lexicalSearch(index, "user")[0]?.digest).toBe("meta");
});

it("matches implementation/command text with the lowest weight", () => {
const index = buildSearchIndex(fixtures);
const results = lexicalSearch(index, "pandas");
Expand Down
106 changes: 93 additions & 13 deletions src/services/componentSearchIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@ import type { ComponentReference } from "@/utils/componentSpec";
import { getComponentName } from "@/utils/getComponentName";

/** Which field of a component matched the query. Surfaced in the UI. */
export type MatchField = "name" | "description" | "io" | "implementation";
export type MatchField =
| "name"
| "description"
| "io"
| "implementation"
| "metadata";

/**
* Where a component came from. Attached to every index entry and threaded
Expand Down Expand Up @@ -72,6 +77,59 @@ export function indexEntryToLexicalMatch(entry: IndexEntry): LexicalMatch {
};
}

const ANNOTATION_KEYS_EXCLUDED_FROM_SEARCH = new Set([
"editor.position",
"editor.collapsed",
"editor.flow-direction",
"flex-nodes",
"python_dependencies",
"python_original_code",
"tangleml.com/editor/task-color",
"tangleml.com/editor/edge-conduits",
"zIndex",
]);

const MAX_ANNOTATION_TEXT_LENGTH = 500;

function isNonEmptyString(value: unknown): value is string {
return typeof value === "string" && value.trim().length > 0;
}

function stringifySearchValue(value: unknown): string {
switch (typeof value) {
case "string":
case "number":
case "boolean":
return String(value);
case "undefined":
return "";
default:
if (value === null) return "";
try {
return JSON.stringify(value);
} catch {
return "";
}
}
}

function extractAnnotationsText(
annotations: Record<string, unknown> | undefined,
): string {
if (!annotations) return "";

const parts: string[] = [];
for (const [key, value] of Object.entries(annotations)) {
if (ANNOTATION_KEYS_EXCLUDED_FROM_SEARCH.has(key)) continue;

const valueText = stringifySearchValue(value).trim();
if (!valueText || valueText.length > MAX_ANNOTATION_TEXT_LENGTH) continue;
parts.push(key, valueText);
}

return parts.join(" ");
}

/**
* Flatten a container implementation's image + command + args into a single
* lowercase string. Placeholder objects (e.g. `{ inputValue: "Where" }`) are
Expand Down Expand Up @@ -123,6 +181,10 @@ export interface ComponentMetadata {
description: string;
inputNames: string[];
outputNames: string[];
/** Names, descriptions, types, and annotations for inputs/outputs. */
ioText: string;
/** Searchable component-level metadata annotations. */
metadataText: string;
}

export function extractComponentMetadata(
Expand All @@ -132,25 +194,33 @@ export function extractComponentMetadata(
const spec = reference.spec;
const description = spec?.description?.trim() ?? "";
const inputNames =
spec?.inputs
?.map((i) => i.name)
.filter((n): n is string => typeof n === "string" && n.length > 0) ?? [];
spec?.inputs?.map((input) => input.name).filter(isNonEmptyString) ?? [];
const outputNames =
spec?.outputs
?.map((o) => o.name)
.filter((n): n is string => typeof n === "string" && n.length > 0) ?? [];
spec?.outputs?.map((output) => output.name).filter(isNonEmptyString) ?? [];
const ioText = [...(spec?.inputs ?? []), ...(spec?.outputs ?? [])]
.flatMap((ioSpec) => [
ioSpec.name,
ioSpec.description,
stringifySearchValue(ioSpec.type),
extractAnnotationsText(ioSpec.annotations),
])
.filter(isNonEmptyString)
.join(" ");
const metadataText = extractAnnotationsText(spec?.metadata?.annotations);
const hasUsefulMetadata =
Boolean(spec?.name) ||
description.length > 0 ||
inputNames.length > 0 ||
outputNames.length > 0;
ioText.length > 0 ||
metadataText.length > 0;
if (!hasUsefulMetadata) return null;
return {
digest: reference.digest,
name: getComponentName(reference),
description,
inputNames,
outputNames,
ioText,
metadataText,
};
}

Expand All @@ -175,10 +245,12 @@ export function buildSearchIndex(sourced: SourcedReference[]): IndexEntry[] {
searchable: {
name: metadata.name.toLowerCase(),
description: metadata.description.toLowerCase(),
io: [...metadata.inputNames, ...metadata.outputNames]
io: metadata.ioText.toLowerCase(),
implementation: extractImplementationText(reference),
metadata: [metadata.metadataText, source.label, reference.published_by]
.filter(isNonEmptyString)
.join(" ")
.toLowerCase(),
implementation: extractImplementationText(reference),
},
});
}
Expand Down Expand Up @@ -244,8 +316,17 @@ const FIELD_WEIGHTS: Record<MatchField, number> = {
description: 2,
io: 2,
implementation: 1,
metadata: 1,
};

const SEARCH_FIELDS: MatchField[] = [
"name",
"description",
"io",
"implementation",
"metadata",
];

interface SearchOptions {
/** Max results to return. Default 20. */
limit?: number;
Expand All @@ -271,12 +352,11 @@ function scoreEntry(
entry: IndexEntry,
tokens: string[],
): { score: number; matchedFields: MatchField[] } {
const fields: MatchField[] = ["name", "description", "io", "implementation"];
const matched = new Set<MatchField>();
let score = 0;

for (const token of tokens) {
for (const field of fields) {
for (const field of SEARCH_FIELDS) {
if (entry.searchable[field].includes(token)) {
score += FIELD_WEIGHTS[field];
matched.add(field);
Expand Down
Loading