diff --git a/RELEASE.rst b/RELEASE.rst
index e28606c012..429c991afc 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -1,6 +1,13 @@
Release Notes
=============
+Version 0.65.3
+--------------
+
+- show resource_category in channel pages (#3264)
+- use youtube etl for playlists and ocw for videos (#3257)
+- Vector search pagination fix for empty hybrid searches (#3251)
+
Version 0.65.2 (Released April 30, 2026)
--------------
diff --git a/frontends/main/src/app-pages/ChannelPage/ChannelSearch.test.tsx b/frontends/main/src/app-pages/ChannelPage/ChannelSearch.test.tsx
index 8f6b4317fd..e4e473cae1 100644
--- a/frontends/main/src/app-pages/ChannelPage/ChannelSearch.test.tsx
+++ b/frontends/main/src/app-pages/ChannelPage/ChannelSearch.test.tsx
@@ -310,4 +310,63 @@ describe("ChannelSearch", () => {
await user.click(screen.getByRole("button", { name: "Search" }))
expect(location.current.searchParams.get("q")).toBe("woof")
})
+
+ test.each([
+ { channelType: ChannelTypeEnum.Topic },
+ { channelType: ChannelTypeEnum.Department },
+ { channelType: ChannelTypeEnum.Unit },
+ ])(
+ "Shows Resource Category facet only when resource_type_group=learning_material ($channelType)",
+ async ({ channelType }) => {
+ const { channel } = setMockApiResponses({
+ channelPatch: { channel_type: channelType },
+ search: {
+ count: 700,
+ metadata: {
+ aggregations: {
+ resource_type_group: [
+ { key: "course", doc_count: 100 },
+ { key: "learning_material", doc_count: 200 },
+ ],
+ resource_category: [
+ { key: "Course", doc_count: 100 },
+ { key: "Video", doc_count: 100 },
+ ],
+ topic: [{ key: "physics", doc_count: 100 }],
+ department: [{ key: "1", doc_count: 100 }],
+ certification_type: [{ key: "micromasters", doc_count: 100 }],
+ delivery: [{ key: "online", doc_count: 100 }],
+ offered_by: [{ key: "ocw", doc_count: 100 }],
+ },
+ suggestions: [],
+ },
+ },
+ })
+
+ const {
+ view: { unmount },
+ } = renderWithProviders(, {
+ url: `/c/${channel.channel_type}/${channel.name}/`,
+ })
+
+ const facetsContainer = await screen.findByTestId("facets-container")
+ await within(facetsContainer).findByText("Certificate")
+ expect(
+ within(facetsContainer).queryByText("Resource Category"),
+ ).toBeNull()
+
+ unmount()
+
+ // Re-render with resource_type_group=learning_material
+ renderWithProviders(, {
+ url: `/c/${channel.channel_type}/${channel.name}/?resource_type_group=learning_material`,
+ })
+
+ expect(
+ await within(await screen.findByTestId("facets-container")).findByText(
+ "Resource Category",
+ ),
+ ).toBeInTheDocument()
+ },
+ )
})
diff --git a/frontends/main/src/app-pages/ChannelPage/searchRequests.ts b/frontends/main/src/app-pages/ChannelPage/searchRequests.ts
index 7deb66d0d4..66b4b45d59 100644
--- a/frontends/main/src/app-pages/ChannelPage/searchRequests.ts
+++ b/frontends/main/src/app-pages/ChannelPage/searchRequests.ts
@@ -30,6 +30,7 @@ export const getConstantSearchParams = (searchFilter?: string) => {
const FACETS_BY_CHANNEL_TYPE: Record = {
[ChannelTypeEnum.Topic]: [
"free",
+ "resource_category",
"resource_type",
"certification_type",
"delivery",
@@ -38,6 +39,7 @@ const FACETS_BY_CHANNEL_TYPE: Record = {
],
[ChannelTypeEnum.Department]: [
"free",
+ "resource_category",
"resource_type",
"certification_type",
"topic",
@@ -46,6 +48,7 @@ const FACETS_BY_CHANNEL_TYPE: Record = {
],
[ChannelTypeEnum.Unit]: [
"free",
+ "resource_category",
"resource_type",
"topic",
"certification_type",
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx
index fbfcf7259e..941e85b920 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx
@@ -184,7 +184,9 @@ const FeaturedVideo: React.FC = ({
totalVideos,
totalTime,
}) => {
- const imageUrl = video.image?.url ?? null
+ const imageUrl =
+ video.image?.url ?? video.content_files?.[0]?.image_src ?? null
+
const duration = video.video?.duration
? formatDurationClockTime(video.video.duration)
: null
@@ -231,7 +233,9 @@ const FeaturedVideo: React.FC = ({
{description && (
- {description}
+
)}
) : (
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/SeriesVideoList.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/SeriesVideoList.tsx
index 3c83f6f7ed..f13f8bea8a 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/SeriesVideoList.tsx
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/SeriesVideoList.tsx
@@ -227,9 +227,12 @@ export const EpisodeItem: React.FC = ({
{episode.title}
-
- {episode.description}
-
+
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx
index 386f04429b..8900d13d90 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx
@@ -142,8 +142,11 @@ type VideoCardProps = {
const VideoCard: React.FC = ({ resource, href }) => {
const [imgError, setImgError] = useState(false)
- const imageUrl =
- !imgError && resource.image?.url ? resource.image.url : PLACEHOLDER_IMG
+ const imageUrl = !imgError
+ ? (resource?.image?.url ??
+ resource.content_files?.[0]?.image_src ??
+ PLACEHOLDER_IMG)
+ : PLACEHOLDER_IMG
const description = resource.description ?? ""
const duration = resource.video?.duration
? formatDurationClockTime(resource.video.duration)
@@ -171,7 +174,7 @@ const VideoCard: React.FC = ({ resource, href }) => {
- {description}
+
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx
index e1c1918d8c..1eadd25e4b 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx
@@ -388,7 +388,13 @@ const VideoDetailPage: React.FC = ({
const sources = useMemo(
() =>
- video ? resolveVideoSources(video.video?.streaming_url, video.url) : [],
+ video
+ ? resolveVideoSources(
+ video.video?.streaming_url,
+ video.url,
+ video.content_files?.[0]?.youtube_id,
+ )
+ : [],
[video],
)
@@ -523,6 +529,7 @@ const VideoDetailPage: React.FC = ({
sources[0]?.type === "video/youtube"
? undefined
: (video?.video?.cover_image_url ??
+ video?.content_files?.[0]?.image_src ??
video?.image?.url ??
undefined)
}
@@ -532,10 +539,12 @@ const VideoDetailPage: React.FC = ({
ariaLabel={`Video: ${videoTitleLabel}`}
ariaDescribedBy="video-description"
/>
- ) : video?.image?.url ? (
+ ) : video?.image?.url || video?.content_files?.[0]?.image_src ? (
= ({
{!isLoading && video?.description && (
-
- {video?.description}
-
+
)}
{!isLoading && !video?.description && (
@@ -627,7 +637,10 @@ const VideoDetailPage: React.FC = ({
const itemDuration = item.video?.duration
? formatDurationClockTime(item.video.duration)
: null
- const imageUrl = item.image?.url ?? null
+ const imageUrl =
+ item.image?.url ??
+ item.content_files?.[0]?.image_src ??
+ null
const itemTopicNames = (item.topics ?? [])
.map((topic) => topic.name)
.filter(Boolean)
@@ -660,9 +673,11 @@ const VideoDetailPage: React.FC = ({
{item.title}
{item.description && (
-
- {item.description}
-
+
)}
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx
index a9b0bf201a..a38787342a 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoSeriesDetailPage.tsx
@@ -64,7 +64,13 @@ const VideoSeriesDetailPage: React.FC = ({
const sources = useMemo(
() =>
- video ? resolveVideoSources(video.video?.streaming_url, video.url) : [],
+ video
+ ? resolveVideoSources(
+ video.video?.streaming_url,
+ video.url,
+ video.content_files?.[0]?.youtube_id,
+ )
+ : [],
[video],
)
@@ -274,9 +280,10 @@ const VideoSeriesDetailPage: React.FC = ({
{/* Description */}
{!isLoading && video?.description && (
-
- {video.description}
-
+
)}
{!isLoading && !video?.description && (
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.test.ts b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.test.ts
index 53135fbfcc..4a4ffdbe8e 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.test.ts
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.test.ts
@@ -5,6 +5,7 @@ describe("resolveVideoSources", () => {
const result = resolveVideoSources(
"https://cdn.example.com/video/index.m3u8",
null,
+ null,
)
expect(result).toEqual([
{
@@ -18,6 +19,7 @@ describe("resolveVideoSources", () => {
const result = resolveVideoSources(
"https://cdn.example.com/video/manifest.mpd",
null,
+ null,
)
expect(result).toEqual([
{
@@ -31,6 +33,7 @@ describe("resolveVideoSources", () => {
const result = resolveVideoSources(
"https://cdn.example.com/video/clip.mp4",
null,
+ null,
)
expect(result).toEqual([
{ src: "https://cdn.example.com/video/clip.mp4", type: "video/mp4" },
@@ -41,6 +44,7 @@ describe("resolveVideoSources", () => {
const result = resolveVideoSources(
"https://cdn.example.com/video/stream",
"https://www.youtube.com/watch?v=abc",
+ null,
)
// streamingUrl takes precedence over pageUrl
expect(result).toEqual([
@@ -52,6 +56,7 @@ describe("resolveVideoSources", () => {
const result = resolveVideoSources(
null,
"https://www.youtube.com/watch?v=abc123",
+ null,
)
expect(result).toEqual([
{ src: "https://www.youtube.com/watch?v=abc123", type: "video/youtube" },
@@ -59,23 +64,47 @@ describe("resolveVideoSources", () => {
})
it("returns YouTube source when pageUrl is a youtu.be short link", () => {
- const result = resolveVideoSources(null, "https://youtu.be/abc123")
+ const result = resolveVideoSources(null, "https://youtu.be/abc123", null)
expect(result).toEqual([
{ src: "https://youtu.be/abc123", type: "video/youtube" },
])
})
- it("returns an empty array when both arguments are null", () => {
- expect(resolveVideoSources(null, null)).toEqual([])
+ it("returns YouTube source from youtubeId when no streaming URL", () => {
+ const result = resolveVideoSources(null, null, "dQw4w9WgXcQ")
+ expect(result).toEqual([
+ {
+ src: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+ type: "video/youtube",
+ },
+ ])
})
- it("returns an empty array when both arguments are undefined", () => {
- expect(resolveVideoSources(undefined, undefined)).toEqual([])
+ it("prefers youtubeId over a non-YouTube pageUrl", () => {
+ const result = resolveVideoSources(
+ null,
+ "https://ocw.mit.edu/some-video",
+ "dQw4w9WgXcQ",
+ )
+ expect(result).toEqual([
+ {
+ src: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+ type: "video/youtube",
+ },
+ ])
})
- it("returns an empty array when there is no streaming URL and pageUrl is not a YouTube link", () => {
- expect(resolveVideoSources(null, "https://ocw.mit.edu/some-video")).toEqual(
- [],
- )
+ it("returns an empty array when all arguments are null", () => {
+ expect(resolveVideoSources(null, null, null)).toEqual([])
+ })
+
+ it("returns an empty array when all arguments are undefined", () => {
+ expect(resolveVideoSources(undefined, undefined, undefined)).toEqual([])
+ })
+
+ it("returns an empty array when there is no streaming URL, no youtubeId, and pageUrl is not a YouTube link", () => {
+ expect(
+ resolveVideoSources(null, "https://ocw.mit.edu/some-video", null),
+ ).toEqual([])
})
})
diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.ts b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.ts
index 2ce85f6fbe..4bcd2da53f 100644
--- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.ts
+++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/videoSources.ts
@@ -10,6 +10,7 @@ import type { VideoJsSource } from "./VideoJsPlayer"
export function resolveVideoSources(
streamingUrl: string | null | undefined,
pageUrl: string | null | undefined,
+ youtubeId: string | null | undefined,
): VideoJsSource[] {
if (streamingUrl) {
// HLS
@@ -24,6 +25,15 @@ export function resolveVideoSources(
return [{ src: streamingUrl, type: "video/mp4" }]
}
+ if (youtubeId) {
+ return [
+ {
+ src: `https://www.youtube.com/watch?v=${youtubeId}`,
+ type: "video/youtube",
+ },
+ ]
+ }
+
if (pageUrl && /youtube\.com|youtu\.be/.test(pageUrl)) {
return [{ src: pageUrl, type: "video/youtube" }]
}
diff --git a/frontends/main/src/page-components/LearningResourceExpanded/InfoSection.tsx b/frontends/main/src/page-components/LearningResourceExpanded/InfoSection.tsx
index 86a44262fe..680ad1afe3 100644
--- a/frontends/main/src/page-components/LearningResourceExpanded/InfoSection.tsx
+++ b/frontends/main/src/page-components/LearningResourceExpanded/InfoSection.tsx
@@ -529,7 +529,13 @@ const INFO_ITEMS: InfoItemConfig = [
label: "Parent Course:",
Icon: RiBookLine,
selector: (resource: LearningResource) => {
- return formattedParentCourseName(resource)
+ const name = formattedParentCourseName(resource)
+ if (!name || !resource.url) return name
+ return (
+
+ {name}
+
+ )
},
},
{
diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py
index 1fe7a9d4b1..631a2a5a26 100644
--- a/learning_resources/etl/loaders.py
+++ b/learning_resources/etl/loaders.py
@@ -4,6 +4,7 @@
from collections.abc import Iterable
from typing import NamedTuple
+from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.cache import caches
from django.db import transaction
@@ -11,6 +12,7 @@
from learning_resources.constants import (
CONTENT_TYPE_PAGE,
+ OCW_CONTENT_CATEGORY_LECTURE_VIDEOS,
OCW_COURSE_CONTENT_CATEGORY_MAPPING,
VIDEO_CONTENT_CATEGORIES,
LearningResourceDelivery,
@@ -1030,11 +1032,20 @@ def load_learning_materials(
"""
material_ids = []
+ if settings.CREATE_OCW_LEARNING_MATERIALS:
+ promoted_ocw_file_types = set(OCW_COURSE_CONTENT_CATEGORY_MAPPING.keys())
+ elif settings.SHOW_OCW_LECTURE_VIDEOS:
+ promoted_ocw_file_types = {OCW_CONTENT_CATEGORY_LECTURE_VIDEOS}
+ else:
+ promoted_ocw_file_types = set()
+
for content_file_id in content_file_ids:
content_file = ContentFile.objects.get(id=content_file_id)
- learning_material_tags = set(
- content_file.content_tags.values_list("name", flat=True)
- ) & set(OCW_COURSE_CONTENT_CATEGORY_MAPPING.keys())
+
+ learning_material_tags = (
+ set(content_file.content_tags.values_list("name", flat=True))
+ & promoted_ocw_file_types
+ )
if content_file.content_type != CONTENT_TYPE_PAGE and learning_material_tags:
material_ids.append(
load_learning_material(course_run, content_file, learning_material_tags)
@@ -1330,6 +1341,7 @@ def load_video(video_data: dict) -> LearningResource:
video_fields = video_data.pop("video", {})
image_data = video_data.pop("image", None)
video_data["resource_category"] = LearningResourceType.video.value
+ video_data.pop("youtube_id", None)
with transaction.atomic():
(
@@ -1443,7 +1455,7 @@ def load_documents(
return document_resources
-def load_ovs_playlist(playlist_data: dict) -> LearningResource:
+def load_ovs_playlist(playlist_data: dict) -> LearningResource | None:
"""
Load a single OVS playlist (collection) and its videos into the database.
@@ -1451,7 +1463,8 @@ def load_ovs_playlist(playlist_data: dict) -> LearningResource:
playlist_data (dict): playlist data including nested videos list
Returns:
- LearningResource: the created or updated playlist resource
+ LearningResource | None: the created or updated playlist resource,
+ or None if not all videos could be matched
"""
channel, _ = VideoChannel.objects.get_or_create(
channel_id="ovs",
@@ -1519,7 +1532,9 @@ def load_ovs_playlists(playlists_data: iter) -> list[LearningResource]:
return playlists
-def load_playlist(video_channel: VideoChannel, playlist_data: dict) -> LearningResource:
+def load_playlist(
+ video_channel: VideoChannel, playlist_data: dict
+) -> LearningResource | None:
"""
Load a video playlist into the database
@@ -1528,13 +1543,38 @@ def load_playlist(video_channel: VideoChannel, playlist_data: dict) -> LearningR
playlist_data (dict): the video playlist
Returns:
- LearningResource: the created or updated playlist resource
+ LearningResource | None: the created or updated playlist resource,
+ or None if create_videos is False and not all videos could be matched
"""
playlist_id = playlist_data.pop("playlist_id")
thumbnail_data = playlist_data.pop("image", None)
videos_data = playlist_data.pop("videos", [])
offered_bys_data = playlist_data.pop("offered_by", None)
+ create_videos = playlist_data.pop("create_videos", True)
+
+ if create_videos:
+ video_resources = load_videos(videos_data)
+ else:
+ video_resources = find_existing_videos(videos_data)
+
+ if video_resources is None:
+ log.info(
+ "Skipping playlist %s: not all videos have matching video. ",
+ playlist_id,
+ )
+
+ existing_resource = LearningResource.objects.filter(
+ readable_id=playlist_id,
+ resource_type=LearningResourceType.video_playlist.name,
+ published=True,
+ ).first()
+ if existing_resource:
+ existing_resource.published = False
+ existing_resource.save()
+ update_index(existing_resource, newly_created=False)
+ return None
+
playlist_data["resource_category"] = LearningResourceType.video_playlist.value
with transaction.atomic():
if thumbnail_data:
@@ -1557,10 +1597,13 @@ def load_playlist(video_channel: VideoChannel, playlist_data: dict) -> LearningR
)
load_offered_by(playlist_resource, offered_bys_data)
- video_resources = load_videos(videos_data)
load_topics(playlist_resource, most_common_topics(video_resources))
+
+ # Unpublish any videos from youtube that are no longer in the playlist
+ # OCW content file videos are removed from the playlist but not unpublished
unpublished_videos = playlist_resource.resources.filter(
resource_type=LearningResourceType.video.name,
+ platform=PlatformType.youtube.name,
published=True,
).exclude(id__in=[video.id for video in video_resources])
unpublished_video_ids = list(unpublished_videos.values_list("id", flat=True))
@@ -1584,6 +1627,42 @@ def load_playlist(video_channel: VideoChannel, playlist_data: dict) -> LearningR
return playlist_resource
+def find_existing_videos(videos_data: list[dict]) -> list[LearningResource] | None:
+ """
+ Find existing video resources matching the given video data.
+
+ Args:
+ videos_data (list of dict): list of video data dicts
+
+ Returns:
+ list of LearningResource | None: list of existing video resources,
+ or None if any video is not found
+ """
+ videos = []
+ for video_data in videos_data:
+ youtube_id = video_data.get("youtube_id")
+ if not youtube_id:
+ return None
+ learning_resource = (
+ ContentFile.objects.filter(
+ youtube_id=youtube_id,
+ direct_learning_resource__resource_type=LearningResourceType.video.name,
+ direct_learning_resource__published=True,
+ )
+ .select_related("direct_learning_resource")
+ .first()
+ )
+ if learning_resource:
+ videos.append(learning_resource.direct_learning_resource)
+ else:
+ log.info(
+ "No existing published video resource found for youtube_id=%s",
+ youtube_id,
+ )
+ return None
+ return videos
+
+
def load_playlists(
video_channel: VideoChannel, playlists_data: iter
) -> list[LearningResource]:
@@ -1599,7 +1678,12 @@ def load_playlists(
the created or updated LearningResources for the playlists
"""
playlists = [
- load_playlist(video_channel, playlist_data) for playlist_data in playlists_data
+ playlist
+ for playlist in (
+ load_playlist(video_channel, playlist_data)
+ for playlist_data in playlists_data
+ )
+ if playlist is not None
]
playlist_ids = [playlist.id for playlist in playlists]
diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py
index ebbc30d4f9..c32198d4b4 100644
--- a/learning_resources/etl/loaders_test.py
+++ b/learning_resources/etl/loaders_test.py
@@ -34,6 +34,7 @@
from learning_resources.etl.loaders import (
ProgramLoadResult,
calculate_completeness,
+ find_existing_videos,
load_content_file,
load_content_files,
load_course,
@@ -2094,6 +2095,7 @@ def test_load_video(mocker, video_exists, is_published, pass_topics):
"offered_by": {"code": offered_by.code},
"published": is_published,
"video": {"duration": video_resource.video.duration},
+ "youtube_id": "youtube123",
}
if pass_topics:
props["topics"] = expected_topics
@@ -2212,6 +2214,200 @@ def test_load_playlist(mocker, playlist_exists, mock_get_similar_topics_qdrant):
)
+def test_load_playlist_removed_videos_unpublished(
+ mocker, mock_get_similar_topics_qdrant
+):
+ """Test that load_playlist unpublishes removed YouTube videos but not OCW videos"""
+ expected_topics = [{"name": "Biology"}]
+ LearningResourceTopicFactory.create(name="Biology")
+ mocker.patch(
+ "learning_resources.etl.loaders.most_common_topics",
+ return_value=expected_topics,
+ )
+ mock_bulk_unpublish = mocker.patch(
+ "learning_resources.etl.loaders.bulk_resources_unpublished_actions",
+ )
+
+ channel = VideoChannelFactory.create()
+ ocw_platform = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name)
+
+ # Create an existing playlist with both a YouTube and OCW video
+ playlist = VideoPlaylistFactory.create(channel=channel).learning_resource
+
+ youtube_video = VideoFactory.create().learning_resource
+ ocw_video = LearningResourceFactory.create(
+ resource_type=LearningResourceType.video.name,
+ platform=ocw_platform,
+ published=True,
+ )
+
+ playlist.resources.add(
+ youtube_video,
+ through_defaults={
+ "relation_type": LearningResourceRelationTypes.PLAYLIST_VIDEOS,
+ "position": 0,
+ },
+ )
+ playlist.resources.add(
+ ocw_video,
+ through_defaults={
+ "relation_type": LearningResourceRelationTypes.PLAYLIST_VIDEOS,
+ "position": 1,
+ },
+ )
+
+ # Load playlist with new videos (excluding the old youtube and ocw videos)
+ new_video_resources = [
+ video.learning_resource for video in VideoFactory.build_batch(2)
+ ]
+ videos_data = [
+ {
+ **model_to_dict(video, exclude=non_transformable_attributes),
+ "platform": PlatformType.youtube.name,
+ "offered_by": {"code": LearningResourceOfferorFactory.create().code},
+ }
+ for video in new_video_resources
+ ]
+
+ props = {
+ **model_to_dict(playlist, exclude=["topics", *non_transformable_attributes]),
+ "platform": PlatformType.youtube.name,
+ "offered_by": {"code": LearningResourceOfferorFactory.create().code},
+ "playlist_id": playlist.readable_id,
+ "url": f"https://youtube.com/playlist?list={playlist.readable_id}",
+ "image": {
+ "url": f"https://i.ytimg.com/vi/{playlist.readable_id}/hqdefault.jpg",
+ "alt": playlist.title,
+ },
+ "videos": videos_data,
+ }
+
+ load_playlist(channel, props)
+
+ # YouTube video should be unpublished
+ youtube_video.refresh_from_db()
+ assert youtube_video.published is False
+
+ # OCW video should NOT be unpublished
+ ocw_video.refresh_from_db()
+ assert ocw_video.published is True
+
+ # bulk_resources_unpublished_actions called only with the youtube video
+ mock_bulk_unpublish.assert_called_once_with(
+ [youtube_video.id],
+ LearningResourceType.video.name,
+ )
+
+
+@pytest.mark.parametrize("all_videos_exist", [True, False])
+def test_find_existing_videos(all_videos_exist):
+ """Test that find_existing_videos returns matching video resources or None"""
+ video_resource_1 = VideoFactory.create().learning_resource
+ video_resource_2 = VideoFactory.create().learning_resource
+ run = LearningResourceRunFactory.create()
+
+ # Create content files with youtube_id linked to video resources
+ ContentFileFactory.create(
+ run=run,
+ youtube_id="yt_id_1",
+ direct_learning_resource=video_resource_1,
+ )
+ ContentFileFactory.create(
+ run=run,
+ youtube_id="yt_id_2",
+ direct_learning_resource=video_resource_2,
+ )
+
+ if all_videos_exist:
+ videos_data = [
+ {"youtube_id": "yt_id_1"},
+ {"youtube_id": "yt_id_2"},
+ ]
+ result = find_existing_videos(videos_data)
+ assert result is not None
+ assert len(result) == 2
+ assert result[0].id == video_resource_1.id
+ assert result[1].id == video_resource_2.id
+ else:
+ videos_data = [
+ {"youtube_id": "yt_id_1"},
+ {"youtube_id": "missing_yt_id"},
+ ]
+ result = find_existing_videos(videos_data)
+ assert result is None
+
+
+def test_find_existing_videos_empty_list():
+ """Test that find_existing_videos returns an empty list for empty input"""
+ result = find_existing_videos([])
+ assert result == []
+
+
+@pytest.mark.parametrize("all_videos_exist", [True, False])
+def test_load_playlist_create_videos_false(
+ mocker, all_videos_exist, mock_get_similar_topics_qdrant
+):
+ """Test load_playlist with create_videos=False uses find_existing_videos"""
+ expected_topics = [{"name": "Biology"}, {"name": "Physics"}]
+ [
+ LearningResourceTopicFactory.create(name=topic["name"])
+ for topic in expected_topics
+ ]
+ mocker.patch(
+ "learning_resources.etl.loaders.most_common_topics",
+ return_value=expected_topics,
+ )
+ mocker.patch(
+ "learning_resources.etl.loaders.bulk_resources_unpublished_actions",
+ )
+ mock_update_index = mocker.patch(
+ "learning_resources.etl.loaders.update_index",
+ )
+
+ channel = VideoChannelFactory.create()
+ run = LearningResourceRunFactory.create()
+
+ if all_videos_exist:
+ video_resource = VideoFactory.create().learning_resource
+ ContentFileFactory.create(
+ run=run,
+ youtube_id="yt_existing",
+ direct_learning_resource=video_resource,
+ )
+ videos_data = [{"youtube_id": "yt_existing"}]
+ else:
+ videos_data = [{"youtube_id": "yt_missing"}]
+
+ playlist = VideoPlaylistFactory.build().learning_resource
+ props = {
+ **model_to_dict(
+ playlist,
+ exclude=["topics", "offered_by", *non_transformable_attributes],
+ ),
+ "platform": PlatformType.youtube.name,
+ "playlist_id": playlist.readable_id,
+ "url": f"https://youtube.com/playlist?list={playlist.readable_id}",
+ "image": {
+ "url": f"https://i.ytimg.com/vi/{playlist.readable_id}/hqdefault.jpg",
+ "alt": playlist.title,
+ },
+ "videos": videos_data,
+ "create_videos": False,
+ }
+
+ result = load_playlist(channel, props)
+
+ if all_videos_exist:
+ assert isinstance(result, LearningResource)
+ assert result.resources.count() == 1
+ assert result.video_playlist.channel == channel
+ else:
+ assert result is None
+ # update_index should not be called for the playlist
+ # (only possibly for unpublishing an existing one)
+ mock_update_index.assert_not_called()
+
+
def test_load_playlists_unpublish(mocker):
"""Test load_playlists when a video/playlist gets unpublished"""
mocker.patch("learning_resources_search.tasks.bulk_deindex_learning_resources.si")
@@ -2985,12 +3181,25 @@ def test_load_documents(mocker, climate_platform, mock_get_similar_topics_qdrant
@pytest.mark.django_db
-def test_load_learning_materials(mocker):
+@pytest.mark.parametrize(
+ ("create_ocw_learning_materials", "show_ocw_lecture_videos"),
+ [
+ (True, False),
+ (False, True),
+ (False, False),
+ ],
+)
+def test_load_learning_materials(
+ mocker, settings, create_ocw_learning_materials, show_ocw_lecture_videos
+):
"""
Test that load_learning_materials runs load_learning_material
- if a ocw content file has content_tags in OCW_COURSE_CONTENT_CATEGORY_MAPPING
+ based on CREATE_OCW_LEARNING_MATERIALS and SHOW_OCW_LECTURE_VIDEOS settings
"""
+ settings.CREATE_OCW_LEARNING_MATERIALS = create_ocw_learning_materials
+ settings.SHOW_OCW_LECTURE_VIDEOS = show_ocw_lecture_videos
+
ocw = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name)
ocw_course = CourseFactory.create(
platform=ocw.code,
@@ -3000,6 +3209,7 @@ def test_load_learning_materials(mocker):
relevant_content_tag = LearningResourceContentTagFactory.create(
name="Programming Assignments"
)
+ lecture_video_tag = LearningResourceContentTagFactory.create(name="Lecture Videos")
irrelevant_content_tag = LearningResourceContentTagFactory.create(name="Syllabus")
no_longer_relevant_resource = LearningResourceFactory.create()
@@ -3010,6 +3220,12 @@ def test_load_learning_materials(mocker):
content_type=CONTENT_TYPE_FILE,
)
+ lecture_video_content_file = ContentFileFactory.create(
+ run=ocw_course.learning_resource.runs.first(),
+ content_tags=[lecture_video_tag],
+ content_type=CONTENT_TYPE_FILE,
+ )
+
other_content_file = ContentFileFactory.create(
run=ocw_course.learning_resource.runs.first(),
content_tags=[irrelevant_content_tag],
@@ -3025,32 +3241,53 @@ def test_load_learning_materials(mocker):
course_run=ocw_course.learning_resource.runs.first(),
content_file_ids=[
learning_material_content_file.id,
+ lecture_video_content_file.id,
other_content_file.id,
],
)
- assert load_learning_materials_spy.call_count == 1
- load_learning_materials_spy.assert_called_with(
- ocw_course.learning_resource.runs.first(),
- learning_material_content_file,
- {"Programming Assignments"},
- )
-
- learning_material_content_file.refresh_from_db()
-
- learning_material = learning_material_content_file.direct_learning_resource
-
- resource_relationships = ocw_course.learning_resource.children.all()
-
- assert resource_relationships.count() == 1
-
- assert resource_relationships.first().child.id == learning_material.id
- assert (
- resource_relationships.first().relation_type
- == LearningResourceRelationTypes.COURSE_LEARNING_MATERIALS.value
- )
-
- assert mock_index.call_count == 2
+ if create_ocw_learning_materials:
+ # Both programming assignments and lecture videos are promoted
+ assert load_learning_materials_spy.call_count == 2
+ load_learning_materials_spy.assert_any_call(
+ ocw_course.learning_resource.runs.first(),
+ learning_material_content_file,
+ {"Programming Assignments"},
+ )
+ load_learning_materials_spy.assert_any_call(
+ ocw_course.learning_resource.runs.first(),
+ lecture_video_content_file,
+ {"Lecture Videos"},
+ )
+ resource_relationships = ocw_course.learning_resource.children.all()
+ assert resource_relationships.count() == 2
+ # 2 load_learning_material calls (each calls update_index)
+ # + 1 for unpublishing no_longer_relevant_resource
+ assert mock_index.call_count == 3
+ elif show_ocw_lecture_videos:
+ # Only lecture videos are promoted
+ assert load_learning_materials_spy.call_count == 1
+ load_learning_materials_spy.assert_called_with(
+ ocw_course.learning_resource.runs.first(),
+ lecture_video_content_file,
+ {"Lecture Videos"},
+ )
+ resource_relationships = ocw_course.learning_resource.children.all()
+ assert resource_relationships.count() == 1
+ lecture_video_content_file.refresh_from_db()
+ assert (
+ resource_relationships.first().child.id
+ == lecture_video_content_file.direct_learning_resource.id
+ )
+ # 1 load_learning_material call + 1 for unpublishing no_longer_relevant_resource
+ assert mock_index.call_count == 2
+ else:
+ # Nothing is promoted
+ assert load_learning_materials_spy.call_count == 0
+ resource_relationships = ocw_course.learning_resource.children.all()
+ assert resource_relationships.count() == 0
+ # 1 for unpublishing no_longer_relevant_resource
+ assert mock_index.call_count == 1
no_longer_relevant_resource.refresh_from_db()
assert no_longer_relevant_resource.published is False
diff --git a/learning_resources/etl/pipelines.py b/learning_resources/etl/pipelines.py
index 78cadb436a..5028ca72e7 100644
--- a/learning_resources/etl/pipelines.py
+++ b/learning_resources/etl/pipelines.py
@@ -160,7 +160,10 @@ def ocw_courses_etl(
calc_completeness=True,
)
- if settings.CREATE_OCW_LEARNING_MATERIALS:
+ if (
+ settings.CREATE_OCW_LEARNING_MATERIALS
+ or settings.SHOW_OCW_LECTURE_VIDEOS
+ ):
loaders.load_learning_materials(course_run, content_file_ids)
else:
log.info("No course data found for %s", url_path)
diff --git a/learning_resources/etl/youtube.py b/learning_resources/etl/youtube.py
index 755655fbe4..d98b49d8ea 100644
--- a/learning_resources/etl/youtube.py
+++ b/learning_resources/etl/youtube.py
@@ -192,7 +192,11 @@ def extract_playlist_items(
def _extract_playlists(
- youtube_client: Resource, request: BatchHttpRequest, playlist_configs: dict
+ youtube_client: Resource,
+ request: BatchHttpRequest,
+ playlist_configs: dict,
+ *,
+ create_videos_channel_setting: bool,
) -> Generator[tuple, None, None]:
"""
Extract a list of playlists
@@ -201,6 +205,8 @@ def _extract_playlists(
youtube_client (Resource): Youtube api client
request (BatchHttpRequest): Youtube api BatchHttpRequest object
playlist_configs (dict): dict of playlist configurations
+ create_videos_channel_setting (bool): whether the channel config
+ is to create videos from youtube data
Returns:
A generator that yields playlist data
@@ -216,15 +222,20 @@ def _extract_playlists(
playlist_id = playlist_data["id"]
if playlist_id in playlist_configs:
playlist_config = playlist_configs[playlist_id]
+ create_videos = playlist_config.get(
+ "create_videos", create_videos_channel_setting
+ )
else:
playlist_config = playlist_configs.get(
WILDCARD_PLAYLIST_ID, {"ignore": True}
)
+ create_videos = create_videos_channel_setting
if not playlist_config.get("ignore", False):
yield (
playlist_data,
extract_playlist_items(youtube_client, playlist_id),
+ create_videos,
)
request = youtube_client.playlists().list_next(request, response)
@@ -237,7 +248,11 @@ def _extract_playlists(
def extract_playlists(
- youtube_client: Resource, playlist_configs: list[dict], channel_id: str
+ youtube_client: Resource,
+ playlist_configs: list[dict],
+ channel_id: str,
+ *,
+ create_videos_channel_setting: bool,
) -> Generator[tuple, None, None]:
"""
Extract a list of playlists for a channel
@@ -245,6 +260,8 @@ def extract_playlists(
youtube_client (object): Youtube api client
playlist_configs (list of dict): list of playlist configurations
channel_id (str): youtube's id for the channel
+ create_videos_channel_setting (bool): whether the channel config
+ is to create videos from youtube data
Returns:
A generator that yields playlist data
"""
@@ -272,7 +289,12 @@ def extract_playlists(
)
for request in requests:
- yield from _extract_playlists(youtube_client, request, playlist_configs_by_id)
+ yield from _extract_playlists(
+ youtube_client,
+ request,
+ playlist_configs_by_id,
+ create_videos_channel_setting=create_videos_channel_setting,
+ )
def extract_channels(
@@ -311,11 +333,13 @@ def extract_channels(
channel_id = channel_data["id"]
channel_config = channel_configs_by_ids[channel_id]
offered_by = channel_config.get("offered_by", None)
+ create_videos = channel_config.get("create_videos", True)
playlist_configs = channel_config.get("playlists", [])
-
- # if we hit any error on a playlist, we simply abort
playlists = extract_playlists(
- youtube_client, playlist_configs, channel_id
+ youtube_client,
+ playlist_configs,
+ channel_id,
+ create_videos_channel_setting=create_videos,
)
yield (offered_by, channel_data, playlists)
@@ -481,11 +505,16 @@ def transform_video(video_data: dict, offered_by_code: str) -> dict:
"duration": video_data["contentDetails"]["duration"],
},
"availability": Availability.anytime.name,
+ "youtube_id": video_data["id"],
}
def transform_playlist(
- playlist_data: dict, videos: Generator[dict, None, None], offered_by_code: str
+ playlist_data: dict,
+ videos: Generator[dict, None, None],
+ offered_by_code: str,
+ *,
+ create_videos: bool,
) -> dict:
"""
Transform a playlist into our normalized data
@@ -494,6 +523,8 @@ def transform_playlist(
playlist_data (dict): the extracted playlist data
videos (generator): generator for data for the playlist's videos
offered_by_code (str): the offered_by code for this playlist
+ create_videos (bool): whether to create videos from this playlist
+ or match to existing videos without creating new ones
Returns:
dict: normalized playlist data
"""
@@ -515,6 +546,7 @@ def transform_playlist(
"alt": playlist_data["snippet"]["title"],
},
"availability": Availability.anytime.name,
+ "create_videos": create_videos,
}
@@ -539,8 +571,10 @@ def transform(extracted_channels: iter) -> Generator[dict, None, None]:
"published": True,
# intentional generator expression
"playlists": (
- transform_playlist(playlist, videos, offered_by)
- for playlist, videos in playlists
+ transform_playlist(
+ playlist, videos, offered_by, create_videos=create_videos
+ )
+ for playlist, videos, create_videos in playlists
),
}
diff --git a/learning_resources/etl/youtube_test.py b/learning_resources/etl/youtube_test.py
index ad3300351e..44d30f53ad 100644
--- a/learning_resources/etl/youtube_test.py
+++ b/learning_resources/etl/youtube_test.py
@@ -176,18 +176,18 @@ def extracted_and_transformed_values(youtube_api_responses):
(
OfferedBy.ocw.name,
channels_list[0]["items"][0],
- [(playlists_list[0]["items"][0], ocw_videos)],
+ [(playlists_list[0]["items"][0], ocw_videos, True)],
),
(
OfferedBy.mitx.name,
channels_list[0]["items"][1],
- [(playlists_list[1]["items"][0], mitx_videos)],
+ [(playlists_list[1]["items"][0], mitx_videos, True)],
),
(
"csail",
channels_list[0]["items"][2],
[
- (playlists_list[2]["items"][1], csail_videos),
+ (playlists_list[2]["items"][1], csail_videos, True),
],
),
]
@@ -236,11 +236,13 @@ def extracted_and_transformed_values(youtube_api_responses):
"video": {
"duration": video["contentDetails"]["duration"],
},
+ "youtube_id": video["id"],
}
for video in videos
],
+ "create_videos": create_videos,
}
- for playlist, videos in playlists
+ for playlist, videos, create_videos in playlists
],
}
for offered_by, channel, playlists in extracted
@@ -263,8 +265,8 @@ def _resolve_extracted_channels(channels):
def _resolve_extracted_playlist(playlist):
"""Resolve a playlist and its nested generators"""
- playlist_data, videos = playlist
- return (playlist_data, list(videos))
+ playlist_data, videos, create_videos = playlist
+ return (playlist_data, list(videos), create_videos)
@pytest.fixture
@@ -423,10 +425,54 @@ def test_extract_playlists_errors(
request = Mock(execute=Mock(side_effect=error(Mock(), b"")))
if raised_exception:
with pytest.raises(raised_exception) as err:
- list(youtube._extract_playlists(mock_youtube_client, request, {})) # noqa: SLF001
+ list(
+ youtube._extract_playlists( # noqa: SLF001
+ mock_youtube_client, request, {}, create_videos_channel_setting=True
+ )
+ )
assert message in str(err)
+@pytest.mark.parametrize(
+ ("channel_create_videos", "playlist_create_videos", "expected"),
+ [
+ (True, None, True),
+ (False, None, False),
+ (True, False, False),
+ (False, True, True),
+ ],
+)
+def test_extract_playlists_create_videos(
+ mock_youtube_client,
+ channel_create_videos,
+ playlist_create_videos,
+ expected,
+):
+ """Test that create_videos is resolved from playlist config or channel setting"""
+ playlist_id = "PL_test_123"
+ playlist_data = {"id": playlist_id, "snippet": {"title": "Test"}}
+ request = Mock(execute=Mock(return_value={"items": [playlist_data]}))
+ mock_youtube_client.return_value.playlists.return_value.list_next.return_value = (
+ None
+ )
+
+ playlist_config = {"id": playlist_id}
+ if playlist_create_videos is not None:
+ playlist_config["create_videos"] = playlist_create_videos
+
+ results = list(
+ youtube._extract_playlists( # noqa: SLF001
+ mock_youtube_client.return_value,
+ request,
+ {playlist_id: playlist_config},
+ create_videos_channel_setting=channel_create_videos,
+ )
+ )
+ assert len(results) == 1
+ _, _, create_videos = results[0]
+ assert create_videos is expected
+
+
@pytest.mark.django_db
@pytest.mark.parametrize(
("error", "raised_exception", "message"),
@@ -462,6 +508,7 @@ def test_transform_playlist(
extracted[0][2][0][0],
extracted[0][2][0][1],
OfferedBy.ocw.name,
+ create_videos=True,
)
assert {**result, "videos": list(result["videos"])} == {
**transformed[0]["playlists"][0],
diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py
index f11361390e..3063ad5bf4 100644
--- a/learning_resources/tasks.py
+++ b/learning_resources/tasks.py
@@ -402,10 +402,14 @@ def update_ocw_learning_material_resources(self): # noqa: ARG001
ocw courses or content files
"""
- if not settings.CREATE_OCW_LEARNING_MATERIALS:
+ if (
+ not settings.CREATE_OCW_LEARNING_MATERIALS
+ and not settings.SHOW_OCW_LECTURE_VIDEOS
+ ):
message = (
- "CREATE_OCW_LEARNING_MATERIALS flag is set to False."
- " update_ocw_learning_material_resources cannont run"
+ "update_ocw_learning_material_resources cannot run because "
+ "CREATE_OCW_LEARNING_MATERIALS and SHOW_OCW_LECTURE_VIDEOS "
+ "flags are set to False."
)
raise RuntimeError(message)
diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py
index d5e10ce417..100099f532 100644
--- a/learning_resources_search/api.py
+++ b/learning_resources_search/api.py
@@ -480,23 +480,26 @@ def generate_filter_clauses(search_params):
if search_params.get("endpoint") == LEARNING_RESOURCE and not search_params.get(
"show_ocw_files"
):
+ ocw_should = [
+ {
+ "bool": {
+ "must_not": [
+ {
+ "nested": {
+ "path": "platform",
+ "query": {"term": {"platform.code": "ocw"}},
+ }
+ }
+ ]
+ }
+ },
+ {"term": {"resource_type": "course"}},
+ ]
+ if settings.SHOW_OCW_LECTURE_VIDEOS:
+ ocw_should.append({"term": {"resource_category": "Lecture Videos"}})
ocw_clause = {
"bool": {
- "should": [
- {
- "bool": {
- "must_not": [
- {
- "nested": {
- "path": "platform",
- "query": {"term": {"platform.code": "ocw"}},
- }
- }
- ]
- }
- },
- {"term": {"resource_type": "course"}},
- ],
+ "should": ocw_should,
"minimum_should_match": 1,
}
}
diff --git a/main/settings.py b/main/settings.py
index b0f609c9bd..669435030c 100644
--- a/main/settings.py
+++ b/main/settings.py
@@ -35,7 +35,7 @@
from main.settings_pluggy import * # noqa: F403
from openapi.settings_spectacular import open_spectacular_settings
-VERSION = "0.65.2"
+VERSION = "0.65.3"
log = logging.getLogger()
@@ -934,3 +934,13 @@ def get_all_config_keys():
# Hubspot settings
MITOL_HUBSPOT_API_PRIVATE_TOKEN = get_string("MITOL_HUBSPOT_API_PRIVATE_TOKEN", None)
+
+# Create all learning material resources for OCW courses
+# Learning material resources are behind show_ocw_files flag in search
+CREATE_OCW_LEARNING_MATERIALS = get_bool("CREATE_OCW_LEARNING_MATERIALS", default=False)
+# Create just lecture video resources for OCW courses.
+# OCW lecture video resources will be displayed in search
+# regardless of show_ocw_files flag
+# If the video config is set to `create_videos`=False ocw videos will be added to
+# playlists
+SHOW_OCW_LECTURE_VIDEOS = get_bool("SHOW_OCW_LECTURE_VIDEOS", default=False)
diff --git a/main/settings_course_etl.py b/main/settings_course_etl.py
index 0e19ce3f93..96ffcfa36a 100644
--- a/main/settings_course_etl.py
+++ b/main/settings_course_etl.py
@@ -145,4 +145,3 @@
"CONTENT_BASE_URL_OLL", "https://openlearninglibrary.mit.edu"
)
CONTENT_BASE_URL_EDX = get_string("CONTENT_BASE_URL_EDX", "https://courses.edx.org")
-CREATE_OCW_LEARNING_MATERIALS = get_bool("CREATE_OCW_LEARNING_MATERIALS", default=False)
diff --git a/openapi/specs/v0.yaml b/openapi/specs/v0.yaml
index 2a65b6cd16..b18dda0d69 100644
--- a/openapi/specs/v0.yaml
+++ b/openapi/specs/v0.yaml
@@ -923,6 +923,8 @@ paths:
name: limit
schema:
type: integer
+ maximum: 1000
+ minimum: 1
description: Number of results to return per page
- in: query
name: offered_by
@@ -936,6 +938,8 @@ paths:
name: offset
schema:
type: integer
+ maximum: 1000
+ minimum: 0
description: The initial index from which to return the results
- in: query
name: platform
@@ -1247,6 +1251,8 @@ paths:
name: limit
schema:
type: integer
+ maximum: 1000
+ minimum: 1
description: Number of results to return per page
- in: query
name: ocw_topic
@@ -1286,6 +1292,8 @@ paths:
name: offset
schema:
type: integer
+ maximum: 1000
+ minimum: 0
description: The initial index from which to return the results
- in: query
name: platform
diff --git a/vector_search/constants.py b/vector_search/constants.py
index 5aa7223a57..bf2deb09de 100644
--- a/vector_search/constants.py
+++ b/vector_search/constants.py
@@ -139,3 +139,6 @@
TOPICS_COLLECTION_NAME: QDRANT_TOPICS_PARAM_MAP,
CONTENT_FILES_COLLECTION_NAME: QDRANT_CONTENT_FILE_PARAM_MAP,
}
+
+# Maximum value of offset + limit accepted by paginated vector search
+MAX_RESULT_WINDOW = 1000
diff --git a/vector_search/serializers.py b/vector_search/serializers.py
index 47aee71862..6e66a7687a 100644
--- a/vector_search/serializers.py
+++ b/vector_search/serializers.py
@@ -20,12 +20,25 @@
SearchResponseSerializer,
)
from vector_search.constants import (
+ MAX_RESULT_WINDOW,
QDRANT_CONTENT_FILE_PARAM_MAP,
QDRANT_LEARNING_RESOURCE_SORTBY_FIELDS,
QDRANT_RESOURCE_PARAM_MAP,
)
+def _validate_result_window(attrs):
+ offset = attrs.get("offset") or 0
+ limit = attrs.get("limit") or 0
+ if offset + limit > MAX_RESULT_WINDOW:
+ msg = (
+ f"offset + limit must not exceed {MAX_RESULT_WINDOW}. "
+ "Use more specific filters to narrow results."
+ )
+ raise serializers.ValidationError(msg)
+ return attrs
+
+
class LearningResourcesSearchFiltersSerializer(serializers.Serializer):
"""
Shared filter fields for Qdrant-backed learning resource queries.
@@ -195,10 +208,16 @@ class LearningResourcesVectorSearchRequestSerializer(
)
q = serializers.CharField(required=False, help_text="The search text")
offset = serializers.IntegerField(
- required=False, help_text="The initial index from which to return the results"
+ required=False,
+ help_text="The initial index from which to return the results",
+ min_value=0,
+ max_value=MAX_RESULT_WINDOW,
)
limit = serializers.IntegerField(
- required=False, help_text="Number of results to return per page"
+ required=False,
+ help_text="Number of results to return per page",
+ min_value=1,
+ max_value=MAX_RESULT_WINDOW,
)
hybrid_search = serializers.BooleanField(
required=False,
@@ -206,6 +225,9 @@ class LearningResourcesVectorSearchRequestSerializer(
help_text="Whether to use a hybrid search",
)
+ def validate(self, attrs):
+ return _validate_result_window(attrs)
+
class LearningResourcesVectorSearchResponseSerializer(SearchResponseSerializer):
"""
@@ -233,10 +255,16 @@ class ContentFileVectorSearchRequestSerializer(serializers.Serializer):
q = serializers.CharField(required=False, help_text="The search text")
offset = serializers.IntegerField(
- required=False, help_text="The initial index from which to return the results"
+ required=False,
+ help_text="The initial index from which to return the results",
+ min_value=0,
+ max_value=MAX_RESULT_WINDOW,
)
limit = serializers.IntegerField(
- required=False, help_text="Number of results to return per page"
+ required=False,
+ help_text="Number of results to return per page",
+ min_value=1,
+ max_value=MAX_RESULT_WINDOW,
)
aggregation_choices = [
(key, key.replace("_", " ").title()) for key in QDRANT_CONTENT_FILE_PARAM_MAP
@@ -315,6 +343,9 @@ class ContentFileVectorSearchRequestSerializer(serializers.Serializer):
help_text="Whether to use a hybrid search",
)
+ def validate(self, attrs):
+ return _validate_result_window(attrs)
+
class ContentFileVectorSearchResponseSerializer(SearchResponseSerializer):
"""
diff --git a/vector_search/serializers_test.py b/vector_search/serializers_test.py
index c7e61bb9cd..14266cc01b 100644
--- a/vector_search/serializers_test.py
+++ b/vector_search/serializers_test.py
@@ -1,4 +1,6 @@
+from vector_search.constants import MAX_RESULT_WINDOW
from vector_search.serializers import (
+ ContentFileVectorSearchRequestSerializer,
LearningResourcesSearchFiltersSerializer,
LearningResourcesVectorSearchRequestSerializer,
)
@@ -37,3 +39,47 @@ def test_vector_search_request_serializer_inherits_filter_fields():
assert "platform" in fields
assert "q" in fields
assert "hybrid_search" in fields
+
+
+def test_vector_search_result_window_validation():
+ """Test that the result window (offset + limit) is validated."""
+ # Valid window
+ data = {"offset": 10, "limit": 10}
+ s = LearningResourcesVectorSearchRequestSerializer(data=data)
+ assert s.is_valid(), s.errors
+
+ # Invalid window: offset + limit > MAX_RESULT_WINDOW
+ data = {"offset": MAX_RESULT_WINDOW, "limit": 1}
+ s = LearningResourcesVectorSearchRequestSerializer(data=data)
+ assert not s.is_valid()
+ assert (
+ f"offset + limit must not exceed {MAX_RESULT_WINDOW}"
+ in s.errors["non_field_errors"][0]
+ )
+
+ # Boundary case: offset + limit == MAX_RESULT_WINDOW
+ data = {"offset": MAX_RESULT_WINDOW - 10, "limit": 10}
+ s = LearningResourcesVectorSearchRequestSerializer(data=data)
+ assert s.is_valid(), s.errors
+
+
+def test_content_file_vector_search_result_window_validation():
+ """Test that the result window (offset + limit) is validated for content files."""
+ # Valid window
+ data = {"offset": 10, "limit": 10}
+ s = ContentFileVectorSearchRequestSerializer(data=data)
+ assert s.is_valid(), s.errors
+
+ # Invalid window: offset + limit > MAX_RESULT_WINDOW
+ data = {"offset": MAX_RESULT_WINDOW, "limit": 1}
+ s = ContentFileVectorSearchRequestSerializer(data=data)
+ assert not s.is_valid()
+ assert (
+ f"offset + limit must not exceed {MAX_RESULT_WINDOW}"
+ in s.errors["non_field_errors"][0]
+ )
+
+ # Boundary case: offset + limit == MAX_RESULT_WINDOW
+ data = {"offset": MAX_RESULT_WINDOW - 10, "limit": 10}
+ s = ContentFileVectorSearchRequestSerializer(data=data)
+ assert s.is_valid(), s.errors
diff --git a/vector_search/views.py b/vector_search/views.py
index de7b4b5d73..f88ac8a1a9 100644
--- a/vector_search/views.py
+++ b/vector_search/views.py
@@ -216,18 +216,29 @@ async def _execute_group_search(self, client, search_params, params):
async def _execute_scroll_search( # noqa: PLR0913
self, client, search_collection, search_filter, limit, offset, order_by
):
- remaining_to_skip = offset
- next_page_offset = None
- search_result = []
-
# Build common scroll kwargs
scroll_kwargs = {
"collection_name": search_collection,
"scroll_filter": search_filter,
"with_vectors": False,
}
+
if order_by:
+ # Qdrant disables pagination (next_page_offset) when order_by
+ # is used. Fetch offset+limit results in one call and slice
+ # on the client side.
scroll_kwargs["order_by"] = self._format_order_by(order_by)
+ scroll_res = await client.scroll(
+ **scroll_kwargs,
+ limit=offset + limit,
+ )
+ page_points, _ = scroll_res
+ return page_points[offset : offset + limit]
+
+ # Standard pagination loop for non-ordered scrolls
+ remaining_to_skip = offset
+ next_page_offset = None
+ search_result = []
while True:
fetch_size = min(max(remaining_to_skip, limit), 1000)
@@ -295,6 +306,13 @@ async def async_vector_search( # noqa: PLR0913
search_result = await self._execute_group_search(
client, search_params, params
)
+ elif order_by:
+ # Qdrant's query_points does not support offset with
+ # OrderByQuery. Fetch offset+limit results and slice
+ # manually on the client side.
+ search_params["limit"] = offset + limit
+ result_obj = await client.query_points(**search_params)
+ search_result = result_obj.points[offset : offset + limit]
else:
search_params["offset"] = offset
result_obj = await client.query_points(**search_params)
diff --git a/vector_search/views_test.py b/vector_search/views_test.py
index a166362b7e..9bf9b121d9 100644
--- a/vector_search/views_test.py
+++ b/vector_search/views_test.py
@@ -421,3 +421,101 @@ def test_vector_search_sortby_parameter(mocker, client, query_string, hybrid_sea
assert "order_by" in call_kwargs
assert call_kwargs["order_by"].key == "views"
assert call_kwargs["order_by"].direction == models.Direction.DESC
+
+
+def test_vector_search_sortby_pagination(mocker, client):
+ """Test that sortby with offset uses client-side slicing instead of Qdrant offset.
+
+ Qdrant's query_points does not support offset with OrderByQuery, so
+ we must fetch offset+limit results and slice on the client side.
+ """
+
+ mock_qdrant = mocker.patch(
+ "qdrant_client.AsyncQdrantClient", return_value=mocker.AsyncMock()
+ )()
+
+ # Create 80 mock points to simulate a large result set
+ mock_points = []
+ for i in range(80):
+ mock_point = mocker.MagicMock()
+ mock_point.payload = {"readable_id": f"resource-{i}"}
+ mock_points.append(mock_point)
+
+ mock_result = mocker.MagicMock()
+ mock_result.points = mock_points
+ mock_qdrant.query_points = mocker.AsyncMock(return_value=mock_result)
+ mock_qdrant.scroll = mocker.AsyncMock(return_value=([], None))
+ mock_qdrant.count = mocker.AsyncMock(return_value=CountResult(count=100))
+ mocker.patch(
+ "vector_search.views.async_qdrant_client",
+ return_value=mock_qdrant,
+ )
+
+ params = {
+ "q": "test",
+ "sortby": "-created_on",
+ "limit": 20,
+ "offset": 60,
+ }
+
+ client.get(
+ reverse("vector_search:v0:vector_learning_resources_search"), data=params
+ )
+
+ call_kwargs = mock_qdrant.query_points.mock_calls[0].kwargs
+
+ # Should request offset+limit results, not just limit
+ assert call_kwargs["limit"] == 80 # 60 + 20
+
+ # Should NOT pass offset to Qdrant when using OrderByQuery
+ assert "offset" not in call_kwargs
+
+
+def test_vector_search_sortby_scroll_pagination(mocker, client):
+ """Test that sortby with offset on scroll (no query) uses client-side slicing.
+
+ Qdrant disables scroll pagination (next_page_offset) when order_by
+ is used. We must fetch offset+limit in one call and slice.
+ """
+
+ mock_qdrant = mocker.patch(
+ "qdrant_client.AsyncQdrantClient", return_value=mocker.AsyncMock()
+ )()
+
+ # Create 80 mock points to simulate a large result set
+ mock_points = []
+ for i in range(80):
+ mock_point = mocker.MagicMock()
+ mock_point.payload = {"readable_id": f"resource-{i}"}
+ mock_points.append(mock_point)
+
+ # scroll returns (points, next_page_offset=None) when order_by is used
+ mock_qdrant.scroll = mocker.AsyncMock(return_value=(mock_points, None))
+ mock_qdrant.query_points = mocker.AsyncMock()
+ mock_qdrant.count = mocker.AsyncMock(return_value=CountResult(count=100))
+ mocker.patch(
+ "vector_search.views.async_qdrant_client",
+ return_value=mock_qdrant,
+ )
+
+ params = {
+ "q": "",
+ "sortby": "-created_on",
+ "limit": 20,
+ "offset": 60,
+ }
+
+ client.get(
+ reverse("vector_search:v0:vector_learning_resources_search"), data=params
+ )
+
+ call_kwargs = mock_qdrant.scroll.mock_calls[0].kwargs
+
+ # Should request offset+limit in a single call
+ assert call_kwargs["limit"] == 80 # 60 + 20
+
+ # Should NOT pass a page offset (no cursor-based pagination with order_by)
+ assert "offset" not in call_kwargs or call_kwargs.get("offset") is None
+
+ # query_points should not be called (no query string)
+ assert mock_qdrant.query_points.call_count == 0