-
Notifications
You must be signed in to change notification settings - Fork 6
Add search quality expectation tests for lexical search #2431
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 06-18-improve_ai_rerank_payload_for_component_search
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -593,6 +593,133 @@ describe("lexicalSearch", () => { | |
| expect(results.map((result) => result.digest)).toEqual(["target"]); | ||
| }); | ||
|
|
||
| describe("search quality expectations", () => { | ||
| const qualityIndex = buildSearchIndex([ | ||
| makeSourced({ | ||
| digest: "train-test-split", | ||
| spec: { | ||
| name: "train_test_split", | ||
| description: "Split a dataset into train and test partitions.", | ||
| inputs: [{ name: "dataset", type: "Dataset" }], | ||
| outputs: [{ name: "train" }, { name: "test" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "train-model", | ||
| spec: { | ||
| name: "train_model", | ||
| description: "Fit a classifier on tabular data.", | ||
| inputs: [{ name: "table", type: "Dataset" }], | ||
| outputs: [{ name: "model", type: { artifact: "Model" } }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "filter-rows", | ||
| spec: { | ||
| name: "filter_rows", | ||
| description: "Filter dataset rows with a boolean condition.", | ||
| inputs: [{ name: "dataset" }], | ||
| outputs: [{ name: "filtered_dataset" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "load-csv", | ||
| spec: { | ||
| name: "load_csv_file", | ||
| description: "Read a CSV file into a tabular dataframe.", | ||
| inputs: [{ name: "path", type: "String" }], | ||
| outputs: [{ name: "table", type: "Dataset" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "local-upload", | ||
| spec: { | ||
| name: "upload_file", | ||
| description: "Upload a file to a local directory.", | ||
| inputs: [{ name: "file" }], | ||
| outputs: [{ name: "path" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "gcs-upload", | ||
| spec: { | ||
| name: "upload_to_gcs", | ||
| description: "Upload a file to GCS cloud storage.", | ||
| inputs: [{ name: "file" }], | ||
| outputs: [{ name: "gcs_uri" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "predict-labels", | ||
| spec: { | ||
| name: "predict_labels", | ||
| description: "Infer labels from examples using a trained model.", | ||
| inputs: [{ name: "model" }, { name: "examples" }], | ||
| outputs: [{ name: "predictions" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| makeSourced({ | ||
| digest: "text-embeddings", | ||
| spec: { | ||
| name: "create_text_embeddings", | ||
| description: "Create vector embeddings for text documents.", | ||
| inputs: [{ name: "documents" }], | ||
| outputs: [{ name: "embeddings", type: "EmbeddingVector" }], | ||
| implementation: { container: { image: "x" } }, | ||
| }, | ||
| }), | ||
| ]); | ||
|
|
||
| it.each([ | ||
| { | ||
| query: "split dataset into train and test", | ||
| expectedDigests: ["train-test-split"], | ||
| }, | ||
| { | ||
| query: "fit model on tabular data", | ||
| expectedDigests: ["train-model"], | ||
| }, | ||
| { | ||
| query: "read csv file", | ||
| expectedDigests: ["load-csv"], | ||
| }, | ||
| { | ||
| query: "filtr dataset rows", | ||
| expectedDigests: ["filter-rows"], | ||
| }, | ||
| { | ||
| query: "infer labels from model", | ||
| expectedDigests: ["predict-labels"], | ||
| }, | ||
| { | ||
| query: "make vector embeddings for text", | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No query in this suite requires synonym expansion: each shares a literal token/stem with its target (this one matches on literal vector/embeddings/text), so the synonym feature is never isolated and the suite would not catch it regressing. Add 1-2 synonym-only cases, e.g. |
||
| expectedDigests: ["text-embeddings"], | ||
| }, | ||
| { | ||
| query: "upload a file but not to GCS", | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This case passes even if negative-constraint parsing is removed entirely: the plain query "upload a file" already ranks |
||
| expectedDigests: ["local-upload"], | ||
| }, | ||
| ])( | ||
| "returns expected results for '$query'", | ||
| ({ query, expectedDigests }) => { | ||
| const results = lexicalSearch(qualityIndex, query).map( | ||
| (result) => result.digest, | ||
| ); | ||
|
|
||
| expect(results.slice(0, expectedDigests.length)).toEqual( | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
These assertions pin only rank #1, not the ordering of close competitors nor that irrelevant components stay out of the visible top-5. Optional: add a couple of 2-element |
||
| expectedDigests, | ||
| ); | ||
| }, | ||
| ); | ||
| }); | ||
|
|
||
| it("does not special-case single-letter non-stop-word tokens", () => { | ||
| const index = buildSearchIndex([ | ||
| makeSourced({ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No ambiguous-multi-match or empty/nonsense-result case in the suite. Optional: add a query that should return multiple relevant components and assert both are present in the top-N.