Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Release Notes
=============

Version 0.63.3
--------------

- Facet counts and aggregations for Vector search (#3188)

Version 0.63.2 (Released April 14, 2026)
--------------

Expand Down
105 changes: 105 additions & 0 deletions frontends/api/src/generated/v0/api.ts

Large diffs are not rendered by default.

44 changes: 0 additions & 44 deletions frontends/main/src/app-pages/SearchPage/SearchPage.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -152,50 +152,6 @@ describe("SearchPage", () => {
},
)

test("Vector Hybrid Search passes correct params and hides count", async () => {
setMockApiResponses({
search: {
count: 700,
metadata: {
aggregations: {
resource_type_group: [{ key: "course", doc_count: 100 }],
},
suggestions: [],
},
results: factories.learningResources.resources({ count: 5 }).results,
},
})

// Authenticate as path editor (admin)
setMockResponse.get(urls.userMe.get(), {
is_learning_path_editor: true,
is_authenticated: true,
})

renderWithProviders(<SearchPage />, { url: "?vector_search=true&q=test" })

await waitFor(() => {
const call = makeRequest.mock.calls.find(([_method, url]) => {
return url.includes(urls.search.vectorResources())
})
expect(call).toBeDefined()
})

const call = makeRequest.mock.calls.find(([_method, url]) =>
url.includes(urls.search.vectorResources()),
)
invariant(call)
const fullUrl = new URL(call[1], "http://mit.edu")
const apiSearchParams = fullUrl.searchParams

expect(apiSearchParams.get("hybrid_search")).toBe("true")
expect(apiSearchParams.get("q")).toBe("test")

// Ensure count is hidden
const hideCountText = screen.queryByText("700 results")
expect(hideCountText).toBeNull()
})

test("Toggling facets", async () => {
setMockApiResponses({
search: {
Expand Down
18 changes: 10 additions & 8 deletions frontends/main/src/page-components/SearchDisplay/SearchDisplay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -516,8 +516,8 @@ const searchModeDropdownOptions = Object.entries(

/**
* Extracts only the fields supported by the vector search API from a broader
* search params object, dropping admin-only params (e.g., aggregations,
* content_file_score_weight) that the vector endpoint does not accept.
* search params object, dropping admin-only params (e.g., content_file_score_weight)
* that the vector endpoint does not accept.
*
* The `as` casts for enum arrays are safe because the v0 and v1 generated
* clients define separate (but structurally identical) enum types for the same
Expand All @@ -526,6 +526,7 @@ const searchModeDropdownOptions = Object.entries(
const toVectorSearchParams = (
params: ReturnType<typeof getSearchParams>,
): VectorSearchRequest => ({
aggregations: params.aggregations as VectorSearchRequest["aggregations"],
certification: params.certification,
certification_type:
params.certification_type as VectorSearchRequest["certification_type"],
Expand Down Expand Up @@ -625,10 +626,13 @@ const SearchDisplay: React.FC<SearchDisplayProps> = ({
const wantsVectorSearch = searchParams.get("vector_search") === "true"
const isVectorSearch = wantsVectorSearch && user?.is_learning_path_editor

const queryOptions = isVectorSearch
? learningResourceQueries.vectorSearch(toVectorSearchParams(allParams))
: learningResourceQueries.search(allParams as LRSearchRequest)

// @ts-expect-error Typescript has trouble unifying the different query key types
const { data, isLoading, isFetching } = useQuery({
...(isVectorSearch
? learningResourceQueries.vectorSearch(toVectorSearchParams(allParams))
: learningResourceQueries.search(allParams as LRSearchRequest)),
...queryOptions,
enabled: !wantsVectorSearch || !isUserLoading,
placeholderData: keepPreviousData,
select: (timedData: {
Expand Down Expand Up @@ -985,9 +989,7 @@ const SearchDisplay: React.FC<SearchDisplayProps> = ({
* the count when data is loaded even if count is same as previous
* count.
*/}
{isFetching || isLoading || isVectorSearch
? ""
: `${data?.count} results`}
{isFetching || isLoading ? "" : `${data?.count} results`}
</VisuallyHidden>
<UniversalAIBanner searchParams={searchParams} />
<Stack direction="row" justifyContent="space-between">
Expand Down
6 changes: 3 additions & 3 deletions main/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from main.settings_pluggy import * # noqa: F403
from openapi.settings_spectacular import open_spectacular_settings

VERSION = "0.63.2"
VERSION = "0.63.3"

log = logging.getLogger()

Expand Down Expand Up @@ -822,10 +822,10 @@ def get_all_config_keys():
QDRANT_CLIENT_TIMEOUT = get_int(name="QDRANT_CLIENT_TIMEOUT", default=10)

VECTOR_HYBRID_SEARCH_PREFETCH_MULTIPLIER = get_int(
name="VECTOR_HYBRID_SEARCH_PREFETCH_MULTIPLIER", default=20
name="VECTOR_HYBRID_SEARCH_PREFETCH_MULTIPLIER", default=5
)
VECTOR_HYBRID_SEARCH_PREFETCH_MAX_LIMIT = get_int(
name="VECTOR_HYBRID_SEARCH_PREFETCH_MAX_LIMIT", default=10000
name="VECTOR_HYBRID_SEARCH_PREFETCH_MAX_LIMIT", default=500
)
# toggle to use requests (default for local) or webdriver which renders js elements
EMBEDDINGS_EXTERNAL_FETCH_USE_WEBDRIVER = get_bool(
Expand Down
114 changes: 114 additions & 0 deletions openapi/specs/v0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,58 @@ paths:
description: Vector Search for content
summary: Content File Vector Search
parameters:
- in: query
name: aggregations
schema:
type: array
items:
enum:
- key
- course_number
- platform
- offered_by
- file_extension
- content_feature_type
- run_readable_id
- resource_readable_id
- run_title
- edx_module_id
- content_type
- description
- title
- url
- file_type
- summary
- flashcards
- checksum
type: string
description: |-
* `key` - Key
* `course_number` - Course Number
* `platform` - Platform
* `offered_by` - Offered By
* `file_extension` - File Extension
* `content_feature_type` - Content Feature Type
* `run_readable_id` - Run Readable Id
* `resource_readable_id` - Resource Readable Id
* `run_title` - Run Title
* `edx_module_id` - Edx Module Id
* `content_type` - Content Type
* `description` - Description
* `title` - Title
* `url` - Url
* `file_type` - File Type
* `summary` - Summary
* `flashcards` - Flashcards
* `checksum` - Checksum
description: "aggregations for facet counts \n\n* `key` - Key\n\
* `course_number` - Course Number\n* `platform` - Platform\n* `offered_by`\
\ - Offered By\n* `file_extension` - File Extension\n* `content_feature_type`\
\ - Content Feature Type\n* `run_readable_id` - Run Readable Id\n* `resource_readable_id`\
\ - Resource Readable Id\n* `run_title` - Run Title\n* `edx_module_id` -\
\ Edx Module Id\n* `content_type` - Content Type\n* `description` - Description\n\
* `title` - Title\n* `url` - Url\n* `file_type` - File Type\n* `summary`\
\ - Summary\n* `flashcards` - Flashcards\n* `checksum` - Checksum"
- in: query
name: collection_name
schema:
Expand Down Expand Up @@ -961,6 +1013,61 @@ paths:
description: Vector Search for learning resources
summary: Vector Search
parameters:
- in: query
name: aggregations
schema:
type: array
items:
enum:
- readable_id
- resource_type
- certification
- certification_type
- professional
- free
- course_feature
- topic
- ocw_topic
- level
- department
- platform
- offered_by
- delivery
- title
- url
- resource_type_group
- resource_category
- published
type: string
description: |-
* `readable_id` - Readable Id
* `resource_type` - Resource Type
* `certification` - Certification
* `certification_type` - Certification Type
* `professional` - Professional
* `free` - Free
* `course_feature` - Course Feature
* `topic` - Topic
* `ocw_topic` - Ocw Topic
* `level` - Level
* `department` - Department
* `platform` - Platform
* `offered_by` - Offered By
* `delivery` - Delivery
* `title` - Title
* `url` - Url
* `resource_type_group` - Resource Type Group
* `resource_category` - Resource Category
* `published` - Published
description: "aggregations for facet counts \n\n* `readable_id`\
\ - Readable Id\n* `resource_type` - Resource Type\n* `certification` -\
\ Certification\n* `certification_type` - Certification Type\n* `professional`\
\ - Professional\n* `free` - Free\n* `course_feature` - Course Feature\n\
* `topic` - Topic\n* `ocw_topic` - Ocw Topic\n* `level` - Level\n* `department`\
\ - Department\n* `platform` - Platform\n* `offered_by` - Offered By\n*\
\ `delivery` - Delivery\n* `title` - Title\n* `url` - Url\n* `resource_type_group`\
\ - Resource Type Group\n* `resource_category` - Resource Category\n* `published`\
\ - Published"
- in: query
name: certification
schema:
Expand Down Expand Up @@ -1255,6 +1362,13 @@ paths:
schema:
type: boolean
nullable: true
- in: query
name: published
schema:
type: boolean
default: true
description: If the resource is published. We default to True unless passed
in
- in: query
name: q
schema:
Expand Down
14 changes: 14 additions & 0 deletions vector_search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
"title": "title",
"url": "url",
"resource_type_group": "resource_type_group",
"resource_category": "resource_category",
"published": "published",
}


Expand All @@ -71,6 +73,7 @@
"url": models.PayloadSchemaType.KEYWORD,
"title": models.PayloadSchemaType.KEYWORD,
"resource_type_group": models.PayloadSchemaType.KEYWORD,
"resource_category": models.PayloadSchemaType.KEYWORD,
}

"""
Expand All @@ -92,3 +95,14 @@
QDRANT_TOPIC_INDEXES = {
"name": models.PayloadSchemaType.KEYWORD,
}


CONTENT_FILES_RETRIEVE_PAYLOAD = ["key", "run_readable_id"]
RESOURCES_RETRIEVE_PAYLOAD = ["readable_id"]


COLLECTION_PARAM_MAP = {
RESOURCES_COLLECTION_NAME: QDRANT_RESOURCE_PARAM_MAP,
TOPICS_COLLECTION_NAME: QDRANT_TOPICS_PARAM_MAP,
CONTENT_FILES_COLLECTION_NAME: QDRANT_CONTENT_FILE_PARAM_MAP,
}
45 changes: 38 additions & 7 deletions vector_search/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
SearchResponseMetadata,
SearchResponseSerializer,
)
from vector_search.constants import (
QDRANT_CONTENT_FILE_PARAM_MAP,
QDRANT_RESOURCE_PARAM_MAP,
)


class LearningResourcesVectorSearchRequestSerializer(serializers.Serializer):
Expand All @@ -35,6 +39,22 @@ class LearningResourcesVectorSearchRequestSerializer(serializers.Serializer):
limit = serializers.IntegerField(
required=False, help_text="Number of results to return per page"
)
aggregation_choices = [
(key, key.replace("_", " ").title()) for key in QDRANT_RESOURCE_PARAM_MAP
]
aggregations = serializers.ListField(
required=False,
child=serializers.ChoiceField(choices=aggregation_choices),
help_text=(
f"aggregations for facet counts \
\n\n{build_choice_description_list(aggregation_choices)}"
),
)
published = serializers.BooleanField(
required=False,
default=True,
help_text="If the resource is published. We default to True unless passed in",
)
readable_id = serializers.CharField(
required=False, help_text="The readable id of the resource"
)
Expand Down Expand Up @@ -177,11 +197,11 @@ def get_results(self, instance):
return instance.get("hits", {})

def get_count(self, instance) -> int:
return instance.get("total", {}).get("value")
return instance.get("total", {}).get("value", 0)

def get_metadata(self, _) -> SearchResponseMetadata:
def get_metadata(self, instance) -> SearchResponseMetadata:
return {
"aggregations": [],
"aggregations": instance.get("aggregations", {}),
"suggest": [],
}

Expand All @@ -198,6 +218,17 @@ class ContentFileVectorSearchRequestSerializer(serializers.Serializer):
limit = serializers.IntegerField(
required=False, help_text="Number of results to return per page"
)
aggregation_choices = [
(key, key.replace("_", " ").title()) for key in QDRANT_CONTENT_FILE_PARAM_MAP
]
aggregations = serializers.ListField(
required=False,
child=serializers.ChoiceField(choices=aggregation_choices),
help_text=(
f"aggregations for facet counts \
\n\n{build_choice_description_list(aggregation_choices)}"
),
)
sortby = serializers.ChoiceField(
required=False,
choices=CONTENT_FILE_SORTBY_OPTIONS,
Expand Down Expand Up @@ -275,14 +306,14 @@ class ContentFileVectorSearchResponseSerializer(SearchResponseSerializer):
"""

def get_count(self, instance) -> int:
return instance["total"]["value"]
return instance.get("total", {}).get("value", 0)

@extend_schema_field(ContentFileSerializer(many=True))
def get_results(self, instance):
return instance["hits"]
return instance.get("hits", {})

def get_metadata(self, *_) -> SearchResponseMetadata:
def get_metadata(self, instance) -> SearchResponseMetadata:
return {
"aggregations": [],
"aggregations": instance.get("aggregations", {}),
"suggest": [],
}
Loading
Loading