diff --git a/RELEASE.rst b/RELEASE.rst index dae25b1c31..8b7bc80d58 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,6 +1,16 @@ Release Notes ============= +Version 0.66.0 +-------------- + +- filter server-side, remove video count (#3287) +- fix fallback when no language is selected (#3286) +- set s-maxage via NEXT_CACHE_S_MAXAGE_SECONDS (#3280) +- fix: assert run.title in DashboardCard heading test (#3282) +- fix: adjust duration position and remove institution label from the video series page and change the title of video and playlist on drawer (#3270) +- qdrant: Automatically compute optimizer settings (#3273) + Version 0.65.6 (Released May 04, 2026) -------------- diff --git a/frontends/api/src/hooks/learningResources/index.ts b/frontends/api/src/hooks/learningResources/index.ts index fe697779a8..989c023895 100644 --- a/frontends/api/src/hooks/learningResources/index.ts +++ b/frontends/api/src/hooks/learningResources/index.ts @@ -193,16 +193,6 @@ const useSimilarLearningResources = ( }) } -const useVectorSimilarLearningResources = ( - id: number, - opts?: { enabled?: boolean }, -) => { - return useQuery({ - ...learningResourceQueries.vectorSimilar(id), - ...opts, - }) -} - const useInfiniteLearningResourceItems = ( id: number, params: Omit, @@ -229,7 +219,6 @@ export { usePlatformsList, useSchoolsList, useSimilarLearningResources, - useVectorSimilarLearningResources, useInfiniteLearningResourceItems, learningResourceQueries, offerorQueries, diff --git a/frontends/api/src/hooks/learningResources/queries.ts b/frontends/api/src/hooks/learningResources/queries.ts index d6e38395d2..505da571ea 100644 --- a/frontends/api/src/hooks/learningResources/queries.ts +++ b/frontends/api/src/hooks/learningResources/queries.ts @@ -22,6 +22,7 @@ import type { LearningResourcesApiLearningResourcesSummaryListRequest as LearningResourcesSummaryListRequest, PaginatedLearningResourceRelationshipList, VideoPlaylistResource, + LearningResourcesApiLearningResourcesVectorSimilarListRequest, } from "../../generated/v1" import type { VectorLearningResourcesSearchApiVectorLearningResourcesSearchRetrieveRequest as VectorLearningResourcesSearchRetrieveRequest } from "../../generated/v0" import { infiniteQueryOptions, queryOptions } from "@tanstack/react-query" @@ -54,10 +55,9 @@ const learningResourceKeys = { detailsRoot: () => [...learningResourceKeys.root, "detail"], detail: (id: number) => [...learningResourceKeys.detailsRoot(), id], similar: (id: number) => [...learningResourceKeys.detail(id), "similar"], - vectorSimilar: (id: number) => [ - ...learningResourceKeys.detail(id), - "vector_similar", - ], + vectorSimilar: ( + params: LearningResourcesApiLearningResourcesVectorSimilarListRequest, + ) => [...learningResourceKeys.detail(params.id), "vector_similar", params], itemsRoot: (id: number) => [...learningResourceKeys.detail(id), "items"], items: (id: number, params: ItemsListRequest) => [ ...learningResourceKeys.itemsRoot(id), @@ -174,12 +174,14 @@ const learningResourceQueries = { .learningResourcesSimilarList({ id }) .then((res) => res.data), }), - vectorSimilar: (id: number) => + vectorSimilar: ( + params: LearningResourcesApiLearningResourcesVectorSimilarListRequest, + ) => queryOptions({ - queryKey: learningResourceKeys.vectorSimilar(id), + queryKey: learningResourceKeys.vectorSimilar(params), queryFn: () => learningResourcesApi - .learningResourcesVectorSimilarList({ id }) + .learningResourcesVectorSimilarList(params) .then((res) => res.data), }), list: (params: LearningResourcesListRequest) => diff --git a/frontends/main/next.config.js b/frontends/main/next.config.js index 1f0b85bb55..e3dc571210 100644 --- a/frontends/main/next.config.js +++ b/frontends/main/next.config.js @@ -8,6 +8,10 @@ const NEXT_PUBLIC_OPTIMIZE_IMAGES = Boolean( ) const IS_LOCAL_DEV = process.env.NODE_ENV === "development" +const NEXT_CACHE_S_MAXAGE_SECONDS = + process.env.NEXT_CACHE_S_MAXAGE_SECONDS || "1800" +const PAGE_CACHE_CONTROL = `s-maxage=${NEXT_CACHE_S_MAXAGE_SECONDS}, stale-if-error=86400, stale-while-revalidate=86400` + const processFeatureFlags = () => { const featureFlagPrefix = process.env.NEXT_PUBLIC_POSTHOG_FEATURE_PREFIX || "FEATURE_" @@ -69,8 +73,7 @@ const nextConfig = { headers: [ { key: "Cache-Control", - value: - "s-maxage=1800, stale-if-error=86400, stale-while-revalidate=86400", + value: PAGE_CACHE_CONTROL, }, ], }, @@ -86,8 +89,7 @@ const nextConfig = { headers: [ { key: "Cache-Control", - value: - "s-maxage=1800, stale-if-error=86400, stale-while-revalidate=86400", + value: PAGE_CACHE_CONTROL, }, ], }, diff --git a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.test.tsx b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.test.tsx index 33580a0ac3..cae4957486 100644 --- a/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.test.tsx +++ b/frontends/main/src/app-pages/DashboardPage/CoursewareDisplay/DashboardCard.test.tsx @@ -126,7 +126,7 @@ describe.each([ }) expect(courseLink).toHaveAttribute("href", coursewareUrl) expect( - within(card).getByRole("heading", { name: course.title, level: 3 }), + within(card).getByRole("heading", { name: courseRun.title, level: 3 }), ).toBeInTheDocument() }) diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/MetaRow.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/MetaRow.tsx deleted file mode 100644 index 46efa2dfb1..0000000000 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/MetaRow.tsx +++ /dev/null @@ -1,45 +0,0 @@ -import React from "react" -import { theme, useMediaQuery } from "ol-components" -import * as Styled from "./VideoSeriesDetailPage.styled" - -type MetaRowProps = { - metaParts: string[] - instructorNames: string | null - departmentName: string | null - duration: string | null - term: string | null -} - -const MetaRow: React.FC = ({ - metaParts, - instructorNames, - departmentName, - duration, - term, -}) => { - const isMobile = useMediaQuery(theme.breakpoints.down("sm")) - - if (isMobile) { - if (!instructorNames && !departmentName && !duration && !term) return null - return ( - - {instructorNames && ( - - {instructorNames} - - )} - {departmentName &&
{departmentName}
} - {(duration || term) && ( - - {[duration, term].filter(Boolean).join(" · ")} - - )} -
- ) - } - - if (metaParts.length === 0) return null - return {metaParts.join(" · ")} -} - -export default MetaRow diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/TopicChips.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/TopicChips.tsx deleted file mode 100644 index 93f3dcb890..0000000000 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/TopicChips.tsx +++ /dev/null @@ -1,32 +0,0 @@ -import React from "react" -import type { LearningResourceTopic } from "api/v1" -import * as Styled from "./VideoSeriesDetailPage.styled" - -type TopicChipsProps = { - topics: LearningResourceTopic[] -} - -const TopicChips: React.FC = ({ topics }) => { - if (topics.length === 0) return null - - return ( - <> - - - Video Series - - - {topics.map((topic) => ( - - {topic.name} - - ))} - - - ) -} - -export default TopicChips diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCollection.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCollection.tsx index b1a768ab98..db8c558a73 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCollection.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCollection.tsx @@ -1,5 +1,5 @@ import React from "react" -import { Typography, styled, theme } from "ol-components" +import { styled } from "ol-components" import VideoContainer from "./VideoContainer" import type { VideoResource } from "api/v1" import { VideoCard, VideoCardSkeleton } from "./VideoCard" @@ -18,22 +18,6 @@ const StyledContainer = styled(VideoContainer)(({ theme }) => ({ borderTop: `1px solid ${theme.custom.colors.lightGray2}`, })) -const CollectionHeader = styled.div(({ theme }) => ({ - display: "flex", - justifyContent: "space-between", - alignItems: "center", - margin: "32px 0 8px 0", - [theme.breakpoints.down("sm")]: { - margin: "24px 0 0 0", - }, -})) - -const CollectionTitle = styled(Typography)({ - ...theme.typography.body1, - fontWeight: theme.typography.fontWeightMedium, - color: theme.custom.colors.black, -}) - const VideoCardList = styled.div(({ theme }) => ({ display: "flex", flexDirection: "column", @@ -61,10 +45,6 @@ const VideoCollection: React.FC = ({ return ( - - {videos.length} Videos - - {isLoading ? Array.from({ length: 4 }).map((_, i) => ( diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoPlaylistCollectionPage.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoPlaylistCollectionPage.tsx index 7855166de1..7329da8744 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoPlaylistCollectionPage.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoPlaylistCollectionPage.tsx @@ -69,11 +69,11 @@ const VideoPlaylistCollectionPage: React.FC< ) const { data: similarData, isLoading: similarLoading } = useQuery({ - ...learningResourceQueries.vectorSimilar(playlistId), - select: (data) => - data.filter( - (resource) => resource.resource_type === ResourceTypeEnum.VideoPlaylist, - ), + ...learningResourceQueries.vectorSimilar({ + id: playlistId, + limit: 6, + resource_type: [ResourceTypeEnum.VideoPlaylist], + }), }) if (!showVideoPlaylistPage) { @@ -88,7 +88,6 @@ const VideoPlaylistCollectionPage: React.FC< (item): item is VideoResource => item.resource_type === VideoResourceResourceTypeEnum.Video, ) - const collectionVideos = videos.slice(1) const playlistType = isOcwPlaylist(playlist) const totalVideos = videos.length @@ -128,7 +127,7 @@ const VideoPlaylistCollectionPage: React.FC< ) : null} {!playlistType && ( diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.styled.ts b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.styled.ts index 7cd30b54d9..459551f4aa 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.styled.ts +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.styled.ts @@ -165,7 +165,7 @@ export const VideoTitle = styled.h1(({ theme }) => ({ ...theme.typography.h2, fontWeight: theme.typography.fontWeightBold, color: theme.custom.colors.black, - margin: "0 0 40px", + margin: "0 0 16px", "&:focus": { outline: "none" }, fontSize: "44px", fontStyle: "normal", @@ -173,7 +173,7 @@ export const VideoTitle = styled.h1(({ theme }) => ({ letterSpacing: "-0.88px", [theme.breakpoints.down("sm")]: { ...theme.typography.h3, - margin: "0 0 14px", + margin: "0 0 8px", letterSpacing: "inherit", }, })) @@ -297,8 +297,9 @@ export const MetaInstructorLine = styled.div(({ theme }) => ({ export const StyledDuration = styled.div(({ theme }) => ({ ...theme.typography.body2, color: theme.custom.colors.silverGrayDark, + margin: "0 0 40px", [theme.breakpoints.down("sm")]: { - marginTop: "4px", + margin: "0 0 16px", }, })) diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.test.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.test.tsx index 766bd1afaa..031c630b67 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.test.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.test.tsx @@ -134,34 +134,6 @@ describe("VideoSeriesDetailPage", () => { name: "Introduction to Machine Learning", }) }) - - test("renders the institution label from the video department", async () => { - const video = makeVideo({ - departments: [ - factories.learningResources.department({ - department_id: "eecs", - name: "Electrical Engineering and Computer Science", - }), - ], - }) - renderPage({ video }) - - await screen.findByText("ELECTRICAL ENGINEERING AND COMPUTER SCIENCE") - }) - - test("renders the institution label from offered_by when no department", async () => { - const playlist = makePlaylist({ - offered_by: { - code: "ocw", - name: "MIT OpenCourseWare", - channel_url: null, - }, - }) - const video = makeVideo({ departments: [] }) - renderPage({ video, playlistId: playlist.id, playlistData: playlist }) - - await screen.findByText("MIT OPENCOURSEWARE") - }) }) describe("breadcrumbs", () => { @@ -391,42 +363,6 @@ describe("VideoSeriesDetailPage", () => { }) }) - describe("topic chips", () => { - test("renders topic chip links for each topic", async () => { - const video = makeVideo({ - topics: [ - { id: 1, name: "Machine Learning", parent: 10, channel_url: null }, - { id: 2, name: "Statistics", parent: 11, channel_url: null }, - ], - }) - renderPage({ video }) - - const mlChip = await screen.findByRole("link", { - name: "Machine Learning", - }) - const statsChip = screen.getByRole("link", { name: "Statistics" }) - expect(mlChip).toHaveAttribute("href", "/search?topic=Machine%20Learning") - expect(statsChip).toHaveAttribute("href", "/search?topic=Statistics") - }) - - test("renders the Video Series heading when topics are present", async () => { - const video = makeVideo({ - topics: [{ id: 1, name: "Robotics", parent: 5, channel_url: null }], - }) - renderPage({ video }) - - await screen.findByText("Video Series") - }) - - test("does not render the Video Series section when there are no topics", async () => { - const video = makeVideo({ topics: [] }) - renderPage({ video }) - - await screen.findByRole("heading", { name: video.title }) - expect(screen.queryByText("Video Series")).not.toBeInTheDocument() - }) - }) - describe("video player", () => { test("renders the video player when a streaming URL is present", async () => { const video = makeVideo({ diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx index a38787342a..306d09224d 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx @@ -17,8 +17,6 @@ import { notFound } from "next/navigation" import { useSeriesNavigation } from "./useSeriesNavigation" import SeriesNavBar from "./SeriesNavBar" import UpNextSection from "./UpNextSection" -import MetaRow from "./MetaRow" -import TopicChips from "./TopicChips" import * as Styled from "./VideoSeriesDetailPage.styled" const VideoJsPlayer = dynamic( @@ -78,40 +76,14 @@ const VideoSeriesDetailPage: React.FC = ({ ? formatDurationClockTime(video.video.duration) : null - const topics = video?.topics ?? [] const playlistLabel = playlist?.title || "Video Collection" - // Meta: instructors, department, duration, term - const run = video?.runs?.[0] - const instructorNames = - run?.instructors - ?.map((i) => i.full_name) - .filter(Boolean) - .join(", ") ?? null - const departmentName = video?.departments?.[0]?.name ?? null - const term = - run?.semester && run?.year - ? `${run.semester} ${run.year}` - : run?.semester || (run?.year ? String(run.year) : null) - const metaParts = [instructorNames, departmentName, duration, term].filter( - Boolean, - ) as string[] - - const institutionLabel = - video?.departments?.[0]?.name?.toUpperCase() ?? - playlist?.offered_by?.name?.toUpperCase() ?? - null - const isLoading = videoLoading || (!!playlistId && playlistLoading) const videoTitleLabel = video?.title?.trim() || "Untitled video" const durationLabel = duration || "Unknown duration" - const topicNamesLabel = - topics - .map((t) => t.name) - .filter(Boolean) - .join(" · ") || "No topics listed" - const videoThumbnailAlt = `Video thumbnail for ${videoTitleLabel}. Duration: ${durationLabel}. Topics: ${topicNamesLabel}` + + const videoThumbnailAlt = `Video thumbnail for ${videoTitleLabel}. Duration: ${durationLabel}` const loadingStatusMessage = isLoading ? "Loading video details and player" : "Video details loaded" @@ -184,15 +156,6 @@ const VideoSeriesDetailPage: React.FC = ({ - {/* Institution / category label */} - {isLoading ? ( - - ) : institutionLabel ? ( - - {institutionLabel} - - ) : null} - {/* Video title */} {isLoading ? ( = ({ {video?.title} )} - + {duration && ( + {duration} + )} {/* Video player */} = ({ )} - {/* Meta row */} - {!isLoading && ( - - )} - {/* Description */} {!isLoading && video?.description && ( = ({ {!isLoading && !video?.description && ( - {videoTitleLabel}. Duration: {durationLabel}. Topics:{" "} - {topicNamesLabel}. + {videoTitleLabel}. Duration: {durationLabel}. )} - - {/* Topic chips */} - {!isLoading && } diff --git a/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.test.tsx b/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.test.tsx index 350af37be6..cceeea53e2 100644 --- a/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.test.tsx +++ b/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.test.tsx @@ -54,7 +54,7 @@ describe("CallToActionSection", () => { resourceType: ResourceTypeEnum.Video, platform: PlatformEnum.Youtube, resourceCategory: "Lecture Video", - expectedText: "Watch Video", + expectedText: "Learn More", }, { resourceType: ResourceTypeEnum.Video, @@ -66,7 +66,7 @@ describe("CallToActionSection", () => { resourceType: ResourceTypeEnum.VideoPlaylist, platform: PlatformEnum.Youtube, resourceCategory: "Video Playlist", - expectedText: "Watch Video", + expectedText: "Learn More", }, { resourceType: ResourceTypeEnum.Podcast, diff --git a/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.tsx b/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.tsx index e931fe7a11..1cdf7e6290 100644 --- a/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.tsx +++ b/frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.tsx @@ -257,13 +257,12 @@ const getCallToActionText = (resource: LearningResource): string => { const listenToPodcast = "Listen to Podcast" const viewArticle = "View Article" const learnMore = "Learn More" - const watchVideos = "Watch Video" const callsToAction = { [ResourceTypeEnum.Course]: learnMore, [ResourceTypeEnum.Program]: learnMore, [ResourceTypeEnum.LearningPath]: learnMore, - [ResourceTypeEnum.Video]: watchVideos, - [ResourceTypeEnum.VideoPlaylist]: watchVideos, + [ResourceTypeEnum.Video]: learnMore, + [ResourceTypeEnum.VideoPlaylist]: learnMore, [ResourceTypeEnum.Podcast]: listenToPodcast, [ResourceTypeEnum.PodcastEpisode]: listenToPodcast, [ResourceTypeEnum.Document]: learnMore, diff --git a/frontends/main/src/page-components/ResourceCarousel/ResourceCarousel.tsx b/frontends/main/src/page-components/ResourceCarousel/ResourceCarousel.tsx index 22e7d2c075..3d08a8ee58 100644 --- a/frontends/main/src/page-components/ResourceCarousel/ResourceCarousel.tsx +++ b/frontends/main/src/page-components/ResourceCarousel/ResourceCarousel.tsx @@ -200,9 +200,9 @@ const getTabQuery = (tab: TabConfig): CarouselQuery => { tab.data.params.id, ) as CarouselQuery case "lr_vector_similar": - return learningResourceQueries.vectorSimilar( - tab.data.params.id, - ) as CarouselQuery + return learningResourceQueries.vectorSimilar({ + id: tab.data.params.id, + }) as CarouselQuery } } diff --git a/frontends/main/validateEnv.js b/frontends/main/validateEnv.js index e84d8fb45b..478e6da9ac 100644 --- a/frontends/main/validateEnv.js +++ b/frontends/main/validateEnv.js @@ -10,6 +10,9 @@ const yup = require("yup") const schema = yup.object().shape({ // Server-only env vars MITOL_NOINDEX: yup.string().oneOf(["true", "false"]), + NEXT_CACHE_S_MAXAGE_SECONDS: yup + .string() + .matches(/^\d+$/, { excludeEmptyString: true }), // Client or Server env vars NEXT_PUBLIC_APPZI_URL: yup.string(), NEXT_PUBLIC_ORIGIN: yup.string().required(), diff --git a/main/settings.py b/main/settings.py index 00ec603259..0236a4dd68 100644 --- a/main/settings.py +++ b/main/settings.py @@ -35,7 +35,7 @@ from main.settings_pluggy import * # noqa: F403 from openapi.settings_spectacular import open_spectacular_settings -VERSION = "0.65.6" +VERSION = "0.66.0" log = logging.getLogger() diff --git a/vector_search/conftest.py b/vector_search/conftest.py index ef62997ffd..be3651cf1f 100644 --- a/vector_search/conftest.py +++ b/vector_search/conftest.py @@ -40,6 +40,13 @@ def _use_test_qdrant_settings(settings, mocker): ) mock_qdrant = mocker.patch("qdrant_client.QdrantClient") mocker.patch("vector_search.utils.SemanticChunker") + mocker.patch( + "vector_search.utils.compute_optimizer_settings", + return_value={ + "indexing_threshold": 10, + "flush_interval_sec": 5, + }, + ) mock_qdrant.scroll.return_value = [ [], diff --git a/vector_search/constants.py b/vector_search/constants.py index bf2deb09de..e740589373 100644 --- a/vector_search/constants.py +++ b/vector_search/constants.py @@ -142,3 +142,24 @@ # Maximum value of offset + limit accepted by paginated vector search MAX_RESULT_WINDOW = 1000 + +# Qdrant Optimizer Settings +# Thresholds for points per shard +QDRANT_OPTIMIZER_THRESHOLD_SMALL = 50_000 +QDRANT_OPTIMIZER_THRESHOLD_MEDIUM = 500_000 +QDRANT_OPTIMIZER_THRESHOLD_LARGE = 2_000_000 + +# Target segment sizes +QDRANT_OPTIMIZER_SEGMENT_SMALL = 20_000 +QDRANT_OPTIMIZER_SEGMENT_MEDIUM = 60_000 +QDRANT_OPTIMIZER_SEGMENT_LARGE = 120_000 +QDRANT_OPTIMIZER_SEGMENT_XLARGE = 250_000 + +# Flush intervals +QDRANT_OPTIMIZER_FLUSH_INTERVAL_SMALL = 15 +QDRANT_OPTIMIZER_FLUSH_INTERVAL_MEDIUM = 20 +QDRANT_OPTIMIZER_FLUSH_INTERVAL_LARGE = 25 +QDRANT_OPTIMIZER_FLUSH_INTERVAL_XLARGE = 30 + +# Indexing threshold ratio +QDRANT_OPTIMIZER_INDEXING_THRESHOLD_RATIO = 0.4 diff --git a/vector_search/utils.py b/vector_search/utils.py index 89791ab06e..c235595a04 100644 --- a/vector_search/utils.py +++ b/vector_search/utils.py @@ -38,6 +38,18 @@ QDRANT_CONTENT_FILE_INDEXES, QDRANT_CONTENT_FILE_PARAM_MAP, QDRANT_LEARNING_RESOURCE_INDEXES, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_LARGE, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_MEDIUM, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_SMALL, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_XLARGE, + QDRANT_OPTIMIZER_INDEXING_THRESHOLD_RATIO, + QDRANT_OPTIMIZER_SEGMENT_LARGE, + QDRANT_OPTIMIZER_SEGMENT_MEDIUM, + QDRANT_OPTIMIZER_SEGMENT_SMALL, + QDRANT_OPTIMIZER_SEGMENT_XLARGE, + QDRANT_OPTIMIZER_THRESHOLD_LARGE, + QDRANT_OPTIMIZER_THRESHOLD_MEDIUM, + QDRANT_OPTIMIZER_THRESHOLD_SMALL, QDRANT_RESOURCE_PARAM_MAP, QDRANT_TOPIC_INDEXES, RESOURCES_COLLECTION_NAME, @@ -127,6 +139,48 @@ def points_generator( yield models.PointStruct(**point_data) +def compute_optimizer_settings(point_count: int, shard_number: int): + points_per_shard = max(point_count // shard_number, 1) + + # Determine target segment size + if points_per_shard < QDRANT_OPTIMIZER_THRESHOLD_SMALL: + target_segment = QDRANT_OPTIMIZER_SEGMENT_SMALL + flush_interval = QDRANT_OPTIMIZER_FLUSH_INTERVAL_SMALL + elif points_per_shard < QDRANT_OPTIMIZER_THRESHOLD_MEDIUM: + target_segment = QDRANT_OPTIMIZER_SEGMENT_MEDIUM + flush_interval = QDRANT_OPTIMIZER_FLUSH_INTERVAL_MEDIUM + elif points_per_shard < QDRANT_OPTIMIZER_THRESHOLD_LARGE: + target_segment = QDRANT_OPTIMIZER_SEGMENT_LARGE + flush_interval = QDRANT_OPTIMIZER_FLUSH_INTERVAL_LARGE + else: + target_segment = QDRANT_OPTIMIZER_SEGMENT_XLARGE + flush_interval = QDRANT_OPTIMIZER_FLUSH_INTERVAL_XLARGE + + indexing_threshold = int(target_segment * QDRANT_OPTIMIZER_INDEXING_THRESHOLD_RATIO) + + return { + "indexing_threshold": indexing_threshold, + "flush_interval_sec": flush_interval, + } + + +def tune_collection(client, collection_name): + info = client.get_collection(collection_name) + point_count = info.points_count + shard_number = info.config.params.shard_number + desired = compute_optimizer_settings(point_count, shard_number) + current = info.config.optimizer_config + if ( + current.indexing_threshold == desired["indexing_threshold"] + and current.flush_interval_sec == desired["flush_interval_sec"] + ): + return + client.update_collection( + collection_name=collection_name, + optimizer_config=models.OptimizersConfigDiff(**desired), + ) + + def create_qdrant_collections(force_recreate): """ Create or recreate QDrant collections @@ -187,6 +241,7 @@ def create_qdrant_collection(collection_name, force_recreate): ), hnsw_config=models.HnswConfigDiff(on_disk=False), ) + tune_collection(client, collection_name) def update_qdrant_indexes(): diff --git a/vector_search/utils_test.py b/vector_search/utils_test.py index c9d2458a44..e2303e8851 100644 --- a/vector_search/utils_test.py +++ b/vector_search/utils_test.py @@ -33,6 +33,18 @@ QDRANT_CONTENT_FILE_INDEXES, QDRANT_CONTENT_FILE_PARAM_MAP, QDRANT_LEARNING_RESOURCE_INDEXES, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_LARGE, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_MEDIUM, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_SMALL, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_XLARGE, + QDRANT_OPTIMIZER_INDEXING_THRESHOLD_RATIO, + QDRANT_OPTIMIZER_SEGMENT_LARGE, + QDRANT_OPTIMIZER_SEGMENT_MEDIUM, + QDRANT_OPTIMIZER_SEGMENT_SMALL, + QDRANT_OPTIMIZER_SEGMENT_XLARGE, + QDRANT_OPTIMIZER_THRESHOLD_LARGE, + QDRANT_OPTIMIZER_THRESHOLD_MEDIUM, + QDRANT_OPTIMIZER_THRESHOLD_SMALL, QDRANT_RESOURCE_PARAM_MAP, RESOURCES_COLLECTION_NAME, ) @@ -46,6 +58,7 @@ _is_markdown_content, _resource_vector_hits, async_qdrant_aggregations, + compute_optimizer_settings, create_qdrant_collections, embed_learning_resources, embed_topics, @@ -63,6 +76,65 @@ pytestmark = pytest.mark.django_db +@pytest.mark.parametrize( + ("point_count", "shard_number", "segment_size", "flush_interval"), + [ + ( + 0, + 10, + QDRANT_OPTIMIZER_SEGMENT_SMALL, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_SMALL, + ), + ( + QDRANT_OPTIMIZER_THRESHOLD_SMALL - 1, + 1, + QDRANT_OPTIMIZER_SEGMENT_SMALL, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_SMALL, + ), + ( + QDRANT_OPTIMIZER_THRESHOLD_SMALL, + 1, + QDRANT_OPTIMIZER_SEGMENT_MEDIUM, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_MEDIUM, + ), + ( + QDRANT_OPTIMIZER_THRESHOLD_MEDIUM, + 2, + QDRANT_OPTIMIZER_SEGMENT_MEDIUM, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_MEDIUM, + ), + ( + QDRANT_OPTIMIZER_THRESHOLD_MEDIUM, + 1, + QDRANT_OPTIMIZER_SEGMENT_LARGE, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_LARGE, + ), + ( + QDRANT_OPTIMIZER_THRESHOLD_LARGE, + 4, + QDRANT_OPTIMIZER_SEGMENT_LARGE, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_LARGE, + ), + ( + QDRANT_OPTIMIZER_THRESHOLD_LARGE, + 1, + QDRANT_OPTIMIZER_SEGMENT_XLARGE, + QDRANT_OPTIMIZER_FLUSH_INTERVAL_XLARGE, + ), + ], +) +def test_compute_optimizer_settings( + point_count, shard_number, segment_size, flush_interval +): + """Optimizer settings are determined by point count per shard.""" + assert compute_optimizer_settings(point_count, shard_number) == { + "indexing_threshold": int( + segment_size * QDRANT_OPTIMIZER_INDEXING_THRESHOLD_RATIO + ), + "flush_interval_sec": flush_interval, + } + + @pytest.mark.parametrize("content_type", ["course", "content_file"]) def test_vector_point_id_used_for_embed(mocker, content_type): # test the vector ids we generate for embedding resources and files