From c379d9bee657c8495e7ec5a4285e6ae5f716620c Mon Sep 17 00:00:00 2001
From: mbeaulne <matt.beaulne@gmail.com>
Date: Thu, 18 Jun 2026 13:58:41 -0400
Subject: [PATCH] Improve component search relevance and AI reranking

---
 src/services/componentSearchIndex.test.ts | 127 ++++++++++++++++++++++
 1 file changed, 127 insertions(+)

diff --git a/src/services/componentSearchIndex.test.ts b/src/services/componentSearchIndex.test.ts
index 219dbe169..5cf7ce12b 100644
--- a/src/services/componentSearchIndex.test.ts
+++ b/src/services/componentSearchIndex.test.ts
@@ -593,6 +593,133 @@ describe("lexicalSearch", () => {
     expect(results.map((result) => result.digest)).toEqual(["target"]);
   });
 
+  describe("search quality expectations", () => {
+    const qualityIndex = buildSearchIndex([
+      makeSourced({
+        digest: "train-test-split",
+        spec: {
+          name: "train_test_split",
+          description: "Split a dataset into train and test partitions.",
+          inputs: [{ name: "dataset", type: "Dataset" }],
+          outputs: [{ name: "train" }, { name: "test" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "train-model",
+        spec: {
+          name: "train_model",
+          description: "Fit a classifier on tabular data.",
+          inputs: [{ name: "table", type: "Dataset" }],
+          outputs: [{ name: "model", type: { artifact: "Model" } }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "filter-rows",
+        spec: {
+          name: "filter_rows",
+          description: "Filter dataset rows with a boolean condition.",
+          inputs: [{ name: "dataset" }],
+          outputs: [{ name: "filtered_dataset" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "load-csv",
+        spec: {
+          name: "load_csv_file",
+          description: "Read a CSV file into a tabular dataframe.",
+          inputs: [{ name: "path", type: "String" }],
+          outputs: [{ name: "table", type: "Dataset" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "local-upload",
+        spec: {
+          name: "upload_file",
+          description: "Upload a file to a local directory.",
+          inputs: [{ name: "file" }],
+          outputs: [{ name: "path" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "gcs-upload",
+        spec: {
+          name: "upload_to_gcs",
+          description: "Upload a file to GCS cloud storage.",
+          inputs: [{ name: "file" }],
+          outputs: [{ name: "gcs_uri" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "predict-labels",
+        spec: {
+          name: "predict_labels",
+          description: "Infer labels from examples using a trained model.",
+          inputs: [{ name: "model" }, { name: "examples" }],
+          outputs: [{ name: "predictions" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+      makeSourced({
+        digest: "text-embeddings",
+        spec: {
+          name: "create_text_embeddings",
+          description: "Create vector embeddings for text documents.",
+          inputs: [{ name: "documents" }],
+          outputs: [{ name: "embeddings", type: "EmbeddingVector" }],
+          implementation: { container: { image: "x" } },
+        },
+      }),
+    ]);
+
+    it.each([
+      {
+        query: "split dataset into train and test",
+        expectedDigests: ["train-test-split"],
+      },
+      {
+        query: "fit model on tabular data",
+        expectedDigests: ["train-model"],
+      },
+      {
+        query: "read csv file",
+        expectedDigests: ["load-csv"],
+      },
+      {
+        query: "filtr dataset rows",
+        expectedDigests: ["filter-rows"],
+      },
+      {
+        query: "infer labels from model",
+        expectedDigests: ["predict-labels"],
+      },
+      {
+        query: "make vector embeddings for text",
+        expectedDigests: ["text-embeddings"],
+      },
+      {
+        query: "upload a file but not to GCS",
+        expectedDigests: ["local-upload"],
+      },
+    ])(
+      "returns expected results for '$query'",
+      ({ query, expectedDigests }) => {
+        const results = lexicalSearch(qualityIndex, query).map(
+          (result) => result.digest,
+        );
+
+        expect(results.slice(0, expectedDigests.length)).toEqual(
+          expectedDigests,
+        );
+      },
+    );
+  });
+
   it("does not special-case single-letter non-stop-word tokens", () => {
     const index = buildSearchIndex([
       makeSourced({