From ed5bdfb72eb16cb7a7c0c60f408b4e69da726987 Mon Sep 17 00:00:00 2001 From: Zaman Afzal Date: Fri, 8 May 2026 11:16:18 +0500 Subject: [PATCH 1/5] feat: Welcome to learn email on new account setup (#3189) * feat: Welcome to learn email --- authentication/api.py | 10 +- authentication/api_test.py | 29 ++++++ main/middleware/apisix_user_test.py | 45 +++++++++ main/settings_pluggy.py | 2 +- main/templates/email/welcome_email.html | 128 ++++++++++++++++++++++++ profiles/plugins.py | 16 +++ profiles/plugins_test.py | 17 ++++ profiles/tasks.py | 41 ++++++++ profiles/tasks_test.py | 112 +++++++++++++++++++++ 9 files changed, 398 insertions(+), 2 deletions(-) create mode 100644 main/templates/email/welcome_email.html create mode 100644 profiles/plugins_test.py create mode 100644 profiles/tasks.py create mode 100644 profiles/tasks_test.py diff --git a/authentication/api.py b/authentication/api.py index 7ac1ca07bc..a8f73ea18f 100644 --- a/authentication/api.py +++ b/authentication/api.py @@ -33,9 +33,17 @@ def create_user(username, email, profile_data=None, user_extra=None): defaults.update({"username": username}) with transaction.atomic(): - user, _ = User.objects.get_or_create(email=email, defaults=defaults) + user, created = User.objects.get_or_create(email=email, defaults=defaults) profile_api.ensure_profile(user, profile_data=profile_data) + if created: + transaction.on_commit( + lambda: user_created_actions( + user=user, + is_new=True, + details=profile_data or {}, + ) + ) return user diff --git a/authentication/api_test.py b/authentication/api_test.py index aef0f9fafe..29ded90065 100644 --- a/authentication/api_test.py +++ b/authentication/api_test.py @@ -37,6 +37,35 @@ def test_create_user(profile_data): assert user.profile.name is None +@pytest.mark.django_db(transaction=True) +def test_create_user_triggers_plugins_for_new_users(mocker): + """create_user should trigger user_created plugins for brand new users.""" + mock_pm = mocker.Mock() + mocker.patch("authentication.api.get_plugin_manager", return_value=mock_pm) + + user = api.create_user("new-user", "new@localhost", {"name": "New User"}) + + mock_pm.hook.user_created.assert_called_once_with( + user=user, + user_data={"profile": {"name": "New User"}}, + ) + + +@pytest.mark.django_db(transaction=True) +def test_create_user_does_not_retrigger_plugins_for_existing_users(mocker): + """create_user should not trigger user_created plugins for existing users.""" + user = UserFactory.create(email="existing@localhost") + mock_pm = mocker.Mock() + mocker.patch("authentication.api.get_plugin_manager", return_value=mock_pm) + + resolved = api.create_user( + "another-username", user.email, {"name": "Existing User"} + ) + + assert resolved.id == user.id + mock_pm.hook.user_created.assert_not_called() + + @pytest.mark.parametrize( "mock_method", ["profiles.api.ensure_profile"], diff --git a/main/middleware/apisix_user_test.py b/main/middleware/apisix_user_test.py index 50cdbdc31c..aed867a296 100644 --- a/main/middleware/apisix_user_test.py +++ b/main/middleware/apisix_user_test.py @@ -84,6 +84,51 @@ def test_get_request_no_posthog_key(mocker, mock_login, settings): mock_posthog_cls.assert_not_called() +@pytest.mark.django_db(transaction=True) +def test_get_request_queues_welcome_email_for_new_sso_user(mocker, mock_login): + """New APISIX users should queue welcome email once.""" + close_old_connections() + mock_delay = mocker.patch("profiles.plugins.send_welcome_email.delay") + mock_request = mocker.Mock( + META={ + "HTTP_X_USERINFO": b64encode(json.dumps(apisix_user_info).encode()), + }, + user=AnonymousUser(), + ) + apisix_middleware = ApisixUserMiddleware(mocker.Mock()) + apisix_middleware.process_request(mock_request) + + user = User.objects.get(email=apisix_user_info["email"]) + mock_login.assert_called_once() + mock_delay.assert_called_once_with(user.id) + + +@pytest.mark.django_db(transaction=True) +def test_get_request_does_not_queue_welcome_email_for_existing_sso_user( + mocker, mock_login +): + """Existing APISIX users should not queue welcome email again.""" + close_old_connections() + existing_user = UserFactory.create( + email=apisix_user_info["email"], + username=apisix_user_info["preferred_username"], + global_id=apisix_user_info["sub"], + ) + mock_delay = mocker.patch("profiles.plugins.send_welcome_email.delay") + mock_request = mocker.Mock( + META={ + "HTTP_X_USERINFO": b64encode(json.dumps(apisix_user_info).encode()), + }, + user=AnonymousUser(), + ) + apisix_middleware = ApisixUserMiddleware(mocker.Mock()) + apisix_middleware.process_request(mock_request) + + existing_user.refresh_from_db() + mock_login.assert_called_once() + mock_delay.assert_not_called() + + @pytest.mark.django_db(transaction=True) def test_get_request_existing_user_no_globalid(mocker, mock_login): """Test that a valid request updates existing user with same email, no global_id.""" diff --git a/main/settings_pluggy.py b/main/settings_pluggy.py index da6aad907f..9b121b402d 100644 --- a/main/settings_pluggy.py +++ b/main/settings_pluggy.py @@ -2,7 +2,7 @@ MITOL_AUTHENTICATION_PLUGINS = get_string( "MITOL_AUTHENTICATION_PLUGINS", - "learning_resources.plugins.FavoritesListPlugin,profiles.plugins.CreateProfilePlugin", + "learning_resources.plugins.FavoritesListPlugin,profiles.plugins.CreateProfilePlugin,profiles.plugins.WelcomeEmailPlugin", ) MITOL_LEARNING_RESOURCES_PLUGINS = get_string( "MITOL_LEARNING_RESOURCES_PLUGINS", diff --git a/main/templates/email/welcome_email.html b/main/templates/email/welcome_email.html new file mode 100644 index 0000000000..a606ecf97b --- /dev/null +++ b/main/templates/email/welcome_email.html @@ -0,0 +1,128 @@ +{% extends "email/email_base.html" %} + +{% block content %} + + +

+ Hi + {{ display_name }}, +

+ + Welcome to MIT Learn. Your account is ready. + +

+ The MIT Learn platform brings together courses, programs and learning + materials from across MIT. With advanced search and AskTIM's AI-powered + guidance, quickly discover the most relevant content for your learning + goals. +

+

+ With your account, you can make MIT Learn your own: +

+ +

+ Use your account to save what matters, follow your interests, and pick up + your learning where you left off. +

+ + + + +
+ Go To Your Dashboard +
+
+

+ The MIT Learn Team +

+

+

+ MIT Learn • 77 Massachusetts Avenue • + Cambridge, MA 02139 • USA +

+ + +{% endblock %} + +{% block footer %}{% endblock %} +{% block footer-address %}{% endblock %} diff --git a/profiles/plugins.py b/profiles/plugins.py index 7faccab4e3..9286f3e28a 100644 --- a/profiles/plugins.py +++ b/profiles/plugins.py @@ -3,6 +3,7 @@ from django.apps import apps from profiles.api import ensure_profile +from profiles.tasks import send_welcome_email class CreateProfilePlugin: @@ -19,3 +20,18 @@ def user_created(self, user, user_data): """ profile_data = user_data.get("profile", {}) ensure_profile(user, profile_data) + + +class WelcomeEmailPlugin: + hookimpl = apps.get_app_config("authentication").hookimpl + + @hookimpl + def user_created(self, user, user_data): # noqa: ARG002 + """ + Send a welcome email when a new user account is created. + + Args: + user(User): the user that was created + user_data(dict): the user data + """ + send_welcome_email.delay(user.id) diff --git a/profiles/plugins_test.py b/profiles/plugins_test.py new file mode 100644 index 0000000000..99a9171652 --- /dev/null +++ b/profiles/plugins_test.py @@ -0,0 +1,17 @@ +"""Tests for profiles plugins.""" + +import pytest + +from main.factories import UserFactory +from profiles.plugins import WelcomeEmailPlugin + + +@pytest.mark.django_db +def test_welcome_email_plugin_user_created(mocker): + """WelcomeEmailPlugin should queue welcome email task on user creation.""" + user = UserFactory.create(email="new.user@example.com", first_name="New") + mocked_delay = mocker.patch("profiles.plugins.send_welcome_email.delay") + + WelcomeEmailPlugin().user_created(user, user_data={}) + + mocked_delay.assert_called_once_with(user.id) diff --git a/profiles/tasks.py b/profiles/tasks.py new file mode 100644 index 0000000000..31764e8b0c --- /dev/null +++ b/profiles/tasks.py @@ -0,0 +1,41 @@ +"""Tasks for profiles.""" + +import logging + +from django.contrib.auth import get_user_model +from django.core.exceptions import ObjectDoesNotExist + +from main.celery import app +from profiles.utils import send_template_email + +log = logging.getLogger(__name__) +User = get_user_model() + + +@app.task +def send_welcome_email(user_id): + """ + Send a welcome email to a user by id. + """ + user = User.objects.filter(id=user_id).first() + if not user: + log.warning("User %s not found for welcome email", user_id) + return + if not user.email: + log.warning("User %s has blank email, skipping welcome email", user_id) + return + + try: + profile_name = user.profile.name + except ObjectDoesNotExist: + profile_name = None + full_name = " ".join( + part for part in [user.first_name, user.last_name] if part + ).strip() + display_name = profile_name or full_name or user.username or "there" + send_template_email( + [user.email], + "MIT Learn - Welcome to MIT Learn", + "email/welcome_email.html", + context={"display_name": display_name}, + ) diff --git a/profiles/tasks_test.py b/profiles/tasks_test.py new file mode 100644 index 0000000000..9f7f01f425 --- /dev/null +++ b/profiles/tasks_test.py @@ -0,0 +1,112 @@ +"""Tests for profile tasks.""" + +import pytest + +from main.factories import UserFactory +from profiles.tasks import send_welcome_email + + +@pytest.mark.django_db +def test_send_welcome_email_sends_template_email(mocker): + """send_welcome_email should send rendered welcome template for valid user.""" + user = UserFactory.create(email="new.user@example.com", first_name="New") + user.profile.name = "Full Name" + user.profile.save() + mocked_send = mocker.patch("profiles.tasks.send_template_email") + + send_welcome_email(user.id) + + mocked_send.assert_called_once_with( + ["new.user@example.com"], + "MIT Learn - Welcome to MIT Learn", + "email/welcome_email.html", + context={"display_name": "Full Name"}, + ) + + +@pytest.mark.django_db +def test_send_welcome_email_missing_user(mocker): + """send_welcome_email should no-op when user does not exist.""" + mocked_send = mocker.patch("profiles.tasks.send_template_email") + + send_welcome_email(999999) + + mocked_send.assert_not_called() + + +@pytest.mark.django_db +def test_send_welcome_email_blank_email(mocker): + """send_welcome_email should no-op when user has blank email.""" + user = UserFactory.create(email="") + mocked_send = mocker.patch("profiles.tasks.send_template_email") + + send_welcome_email(user.id) + + mocked_send.assert_not_called() + + +@pytest.mark.django_db +def test_send_welcome_email_uses_full_name_when_profile_name_missing(mocker): + """Falls back to first+last name if profile.name is missing.""" + user = UserFactory.create( + email="full.name@example.com", + first_name="Full", + last_name="Name", + ) + user.profile.name = None + user.profile.save() + mocked_send = mocker.patch("profiles.tasks.send_template_email") + + send_welcome_email(user.id) + + mocked_send.assert_called_once_with( + ["full.name@example.com"], + "MIT Learn - Welcome to MIT Learn", + "email/welcome_email.html", + context={"display_name": "Full Name"}, + ) + + +@pytest.mark.django_db +def test_send_welcome_email_uses_username_when_names_missing(mocker): + """Falls back to username when profile/full name are not available.""" + user = UserFactory.create( + email="username.only@example.com", + first_name="", + last_name="", + username="username-only", + ) + user.profile.name = None + user.profile.save() + mocked_send = mocker.patch("profiles.tasks.send_template_email") + + send_welcome_email(user.id) + + mocked_send.assert_called_once_with( + ["username.only@example.com"], + "MIT Learn - Welcome to MIT Learn", + "email/welcome_email.html", + context={"display_name": "username-only"}, + ) + + +@pytest.mark.django_db +def test_send_welcome_email_handles_missing_profile_relation(mocker): + """Falls back cleanly when the reverse profile relation is missing.""" + user = UserFactory.create( + email="missing.profile@example.com", + first_name="", + last_name="", + username="profile-missing", + ) + user.profile.delete() + mocked_send = mocker.patch("profiles.tasks.send_template_email") + + send_welcome_email(user.id) + + mocked_send.assert_called_once_with( + ["missing.profile@example.com"], + "MIT Learn - Welcome to MIT Learn", + "email/welcome_email.html", + context={"display_name": "profile-missing"}, + ) From ae5d5a06c96e08770f1d6d6d7840ef7aabef6ec7 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 8 May 2026 09:10:05 -0400 Subject: [PATCH 2/5] Both ETL and webhook should populate OVS resource_category when applicable. (#3313) --- learning_resources/etl/loaders.py | 3 --- learning_resources/etl/ovs.py | 7 ++++++- learning_resources/etl/ovs_test.py | 32 ++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index c0ebfa20cb..062e28c52f 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -15,7 +15,6 @@ OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, OCW_COURSE_CONTENT_CATEGORY_MAPPING, VIDEO_CONTENT_CATEGORIES, - VIDEO_SHORT_RESOURCE_CATEGORY, LearningResourceDelivery, LearningResourceRelationTypes, LearningResourceType, @@ -1493,8 +1492,6 @@ def load_ovs_video_from_webhook(video_payload: dict) -> LearningResource | None: if video_data is None: return None collection = video_payload.get("collection") or {} - if collection.get("for_shorts"): - video_data["resource_category"] = VIDEO_SHORT_RESOURCE_CATEGORY with transaction.atomic(): playlist = ( diff --git a/learning_resources/etl/ovs.py b/learning_resources/etl/ovs.py index d471af4f84..35309014c9 100644 --- a/learning_resources/etl/ovs.py +++ b/learning_resources/etl/ovs.py @@ -10,6 +10,7 @@ from django.db.models import QuerySet from learning_resources.constants import ( + VIDEO_SHORT_RESOURCE_CATEGORY, Availability, LearningResourceType, PlatformType, @@ -221,7 +222,7 @@ def transform_video(video_data: dict) -> dict | None: cover_image_url = _get_cover_image_url(video_data) image_data = {"url": cover_image_url} if cover_image_url else None - return { + video_dict = { "readable_id": video_data["key"], "platform": PlatformType.ovs.name, "etl_source": ETLSource.ovs.name, @@ -241,6 +242,10 @@ def transform_video(video_data: dict) -> dict | None: }, } + if (video_data.get("collection") or {}).get("for_shorts", False): + video_dict["resource_category"] = VIDEO_SHORT_RESOURCE_CATEGORY + return video_dict + def transform_collection(collection_data: dict) -> dict: """ diff --git a/learning_resources/etl/ovs_test.py b/learning_resources/etl/ovs_test.py index 9a7a398db6..7d5d582fe3 100644 --- a/learning_resources/etl/ovs_test.py +++ b/learning_resources/etl/ovs_test.py @@ -7,6 +7,7 @@ import requests from learning_resources.constants import ( + VIDEO_SHORT_RESOURCE_CATEGORY, Availability, LearningResourceType, PlatformType, @@ -570,6 +571,37 @@ def test_transform_video_returns_none_without_m3u8(self, sources): assert transform_video(video_data) is None +@pytest.mark.parametrize( + ("collection", "expected_category"), + [ + pytest.param( + {"key": "c1", "for_shorts": True}, + VIDEO_SHORT_RESOURCE_CATEGORY, + id="for_shorts_true", + ), + pytest.param({"key": "c1", "for_shorts": False}, None, id="for_shorts_false"), + pytest.param({"key": "c1"}, None, id="for_shorts_missing"), + pytest.param(None, None, id="collection_none"), + ], +) +def test_transform_video_resource_category(collection, expected_category): + """resource_category is set only when the video's collection has for_shorts=True""" + video_data = { + "key": "test_key", + "title": "Test", + "description": "", + "created_at": "2020-01-01T00:00:00Z", + "sources": [{"src": "https://example.com/video__index.m3u8"}], + "videothumbnail_set": [], + "videosubtitle_set": [], + "cta_link": None, + "duration": 0, + "collection": collection, + } + result = transform_video(video_data) + assert result.get("resource_category") == expected_category + + @pytest.fixture def ovs_platform(): """Create OVS platform for tests.""" From eb9dfb6824af890f34d954c8b6938e0e007ea012 Mon Sep 17 00:00:00 2001 From: Ahtesham Quraish Date: Fri, 8 May 2026 18:12:09 +0500 Subject: [PATCH 3/5] fix: update metadata for video pages (#3310) * fix: update metadata for video pages --------- Co-authored-by: Ahtesham Quraish --- .../main/src/app/video-playlist/[id]/page.tsx | 42 +++++++++++++++---- .../app/video-playlist/detail/[id]/page.tsx | 28 ++++++++++--- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/frontends/main/src/app/video-playlist/[id]/page.tsx b/frontends/main/src/app/video-playlist/[id]/page.tsx index 2f1cafc8eb..6725a09b69 100644 --- a/frontends/main/src/app/video-playlist/[id]/page.tsx +++ b/frontends/main/src/app/video-playlist/[id]/page.tsx @@ -1,7 +1,6 @@ import React from "react" -import type { Metadata } from "next" import { HydrationBoundary, dehydrate } from "@tanstack/react-query" -import { standardizeMetadata } from "@/common/metadata" +import { safeGenerateMetadata, standardizeMetadata } from "@/common/metadata" import { learningResourceQueries, videoPlaylistQueries, @@ -10,17 +9,46 @@ import { getQueryClient } from "@/app/getQueryClient" import VideoPlaylistCollectionPage from "@/app-pages/VideoPlaylistCollectionPage/VideoPlaylistCollectionPage" import { notFound } from "next/navigation" -export const metadata: Metadata = standardizeMetadata({ - title: "Video Playlist", - robots: "noindex, nofollow", -}) +export const generateMetadata = async ( + props: PageProps<"/video-playlist/[id]">, +) => { + const { id } = await props.params + const playlistId = Number(id) + if (!Number.isInteger(playlistId) || playlistId <= 0) { + notFound() + } + const queryClient = getQueryClient() + + return safeGenerateMetadata(async () => { + const [playlist, items] = await Promise.all([ + queryClient.fetchQuery(videoPlaylistQueries.detail(playlistId)), + queryClient.fetchQuery( + learningResourceQueries.items(playlistId, { + learning_resource_id: playlistId, + }), + ), + ]) + + const firstVideoImage = items?.[0]?.image?.url + const firstVideoImageAlt = items?.[0]?.image?.alt ?? undefined + + return standardizeMetadata({ + title: playlist.title, + description: playlist.description ?? undefined, + image: firstVideoImage ?? playlist.image?.url, + imageAlt: firstVideoImage + ? firstVideoImageAlt + : (playlist.image?.alt ?? undefined), + }) + }) +} const Page: React.FC> = async ({ params, }) => { const { id } = await params const playlistId = Number(id) - if (Number.isNaN(playlistId)) { + if (!Number.isInteger(playlistId) || playlistId <= 0) { notFound() } const queryClient = getQueryClient() diff --git a/frontends/main/src/app/video-playlist/detail/[id]/page.tsx b/frontends/main/src/app/video-playlist/detail/[id]/page.tsx index f9825bfa7d..2d608c0f6d 100644 --- a/frontends/main/src/app/video-playlist/detail/[id]/page.tsx +++ b/frontends/main/src/app/video-playlist/detail/[id]/page.tsx @@ -1,7 +1,6 @@ import React from "react" -import type { Metadata } from "next" import { HydrationBoundary, dehydrate } from "@tanstack/react-query" -import { standardizeMetadata } from "@/common/metadata" +import { safeGenerateMetadata, standardizeMetadata } from "@/common/metadata" import { learningResourceQueries, videoPlaylistQueries, @@ -10,9 +9,28 @@ import { getQueryClient } from "@/app/getQueryClient" import VideoDetailPageRouter from "@/app-pages/VideoPlaylistCollectionPage/VideoDetailPageRouter" import { notFound } from "next/navigation" -export const metadata: Metadata = standardizeMetadata({ - title: "Video Detail", -}) +export const generateMetadata = async ( + props: PageProps<"/video-playlist/detail/[id]">, +) => { + const { id } = await props.params + const videoId = Number(id) + if (!Number.isInteger(videoId) || videoId <= 0) { + notFound() + } + const queryClient = getQueryClient() + + return safeGenerateMetadata(async () => { + const resource = await queryClient.fetchQuery( + learningResourceQueries.detail(videoId), + ) + return standardizeMetadata({ + title: resource.title, + description: resource.description ?? undefined, + image: resource.image?.url, + imageAlt: resource.image?.alt ?? undefined, + }) + }) +} const Page: React.FC> = async ({ params, From 469e2732632c58f289e7173aeecef02a1320b2c0 Mon Sep 17 00:00:00 2001 From: Anastasia Beglova Date: Fri, 8 May 2026 09:38:30 -0400 Subject: [PATCH 4/5] refactor ocw video etl (#3308) --- .../FeaturedVideo.tsx | 3 +- .../VideoPlaylistCollectionPage/VideoCard.tsx | 4 +- .../VideoDetailPage.tsx | 12 +- learning_resources/constants.py | 26 +- learning_resources/etl/loaders.py | 180 +++++++--- learning_resources/etl/loaders_test.py | 325 ++++++++++++------ learning_resources/etl/pipelines.py | 5 +- learning_resources/tasks.py | 8 +- learning_resources_search/api.py | 3 +- learning_resources_search/api_test.py | 1 + main/settings.py | 6 - 11 files changed, 368 insertions(+), 205 deletions(-) diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx index 941e85b920..7e2f62a318 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/FeaturedVideo.tsx @@ -184,8 +184,7 @@ const FeaturedVideo: React.FC = ({ totalVideos, totalTime, }) => { - const imageUrl = - video.image?.url ?? video.content_files?.[0]?.image_src ?? null + const imageUrl = video.image?.url ?? null const duration = video.video?.duration ? formatDurationClockTime(video.video.duration) diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx index 8900d13d90..6e60a03ce7 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoCard.tsx @@ -143,9 +143,7 @@ type VideoCardProps = { const VideoCard: React.FC = ({ resource, href }) => { const [imgError, setImgError] = useState(false) const imageUrl = !imgError - ? (resource?.image?.url ?? - resource.content_files?.[0]?.image_src ?? - PLACEHOLDER_IMG) + ? (resource?.image?.url ?? PLACEHOLDER_IMG) : PLACEHOLDER_IMG const description = resource.description ?? "" const duration = resource.video?.duration diff --git a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx index 784dd24059..353a7f0f97 100644 --- a/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx +++ b/frontends/main/src/app-pages/VideoPlaylistCollectionPage/VideoDetailPage.tsx @@ -571,7 +571,6 @@ const VideoDetailPage: React.FC = ({ sources[0]?.type === "video/youtube" ? undefined : (video?.video?.cover_image_url ?? - video?.content_files?.[0]?.image_src ?? video?.image?.url ?? undefined) } @@ -581,12 +580,10 @@ const VideoDetailPage: React.FC = ({ ariaLabel={`Video: ${videoTitleLabel}`} ariaDescribedBy="video-description" /> - ) : video?.image?.url || video?.content_files?.[0]?.image_src ? ( + ) : video?.image?.url ? ( {videoThumbnailAlt} = ({ const itemDuration = item.video?.duration ? formatDurationClockTime(item.video.duration) : null - const imageUrl = - item.image?.url ?? - item.content_files?.[0]?.image_src ?? - null + const imageUrl = item.image?.url ?? null const itemTopicNames = (item.topics ?? []) .map((topic) => topic.name) .filter(Boolean) diff --git a/learning_resources/constants.py b/learning_resources/constants.py index 098085d2dc..5eb63b3bb1 100644 --- a/learning_resources/constants.py +++ b/learning_resources/constants.py @@ -130,27 +130,16 @@ class LearningResourceRelationTypes(TextChoices): OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT = "Practice & Assignment" -OCW_CONTENT_CATEGORY_OPEN_TEXTBOOKS = "Open Textbooks" -OCW_CONTENT_CATEGORY_LECTURE_VIDEOS = "Lecture Videos" -OCW_CONTENT_CATEGORY_TEACHING_RESOURCES = "Teaching Resources" - -VALID_COURSE_CONTENT_CATEGORY_CHOICES = ( - ( - OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT, - OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT, - ), - (OCW_CONTENT_CATEGORY_OPEN_TEXTBOOKS, OCW_CONTENT_CATEGORY_OPEN_TEXTBOOKS), - (OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, OCW_CONTENT_CATEGORY_LECTURE_VIDEOS), - (OCW_CONTENT_CATEGORY_TEACHING_RESOURCES, OCW_CONTENT_CATEGORY_TEACHING_RESOURCES), -) +OCW_CONTENT_CATEGORY_OPEN_TEXTBOOKS = "Open Textbook" +OCW_CONTENT_CATEGORY_LECTURE_VIDEOS = "Lecture Video" -VIDEO_CONTENT_CATEGORIES = [ - OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, - OCW_CONTENT_CATEGORY_TEACHING_RESOURCES, -] +OCW_INSTRUCTOR_INSIGHTS_TAG = "Instructor Insights" VIDEO_SHORT_RESOURCE_CATEGORY = "Video Short" +OCW_PLAYLIST_VIDEO_THRESHOLD = 0.6 + + OCW_COURSE_CONTENT_CATEGORY_MAPPING = { "Exams": OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT, "Exams Solutions": OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT, @@ -166,9 +155,6 @@ class LearningResourceRelationTypes(TextChoices): "Activity Assignments": OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT, "Written Assignments": OCW_CONTENT_CATEGORY_PRACTICE_AND_ASSIGNMENT, OCW_CONTENT_CATEGORY_OPEN_TEXTBOOKS: OCW_CONTENT_CATEGORY_OPEN_TEXTBOOKS, - "Lecture Videos": OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, - "Problem-solving Videos": OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, - "Instructor Insights": OCW_CONTENT_CATEGORY_TEACHING_RESOURCES, } diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 062e28c52f..5a1bf1b809 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -14,7 +14,8 @@ CONTENT_TYPE_PAGE, OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, OCW_COURSE_CONTENT_CATEGORY_MAPPING, - VIDEO_CONTENT_CATEGORIES, + OCW_INSTRUCTOR_INSIGHTS_TAG, + OCW_PLAYLIST_VIDEO_THRESHOLD, LearningResourceDelivery, LearningResourceRelationTypes, LearningResourceType, @@ -1032,12 +1033,10 @@ def load_learning_materials( """ material_ids = [] - if settings.CREATE_OCW_LEARNING_MATERIALS: - promoted_ocw_file_types = set(OCW_COURSE_CONTENT_CATEGORY_MAPPING.keys()) - elif settings.SHOW_OCW_LECTURE_VIDEOS: - promoted_ocw_file_types = {OCW_CONTENT_CATEGORY_LECTURE_VIDEOS} - else: - promoted_ocw_file_types = set() + if not settings.CREATE_OCW_LEARNING_MATERIALS: + return + + promoted_ocw_file_types = set(OCW_COURSE_CONTENT_CATEGORY_MAPPING.keys()) for content_file_id in content_file_ids: content_file = ContentFile.objects.get(id=content_file_id) @@ -1050,6 +1049,15 @@ def load_learning_materials( material_ids.append( load_learning_material(course_run, content_file, learning_material_tags) ) + + # OCW lecture videos combine youtube and ocw data and are + # managed through a separate process + elif ( + content_file.direct_learning_resource + and content_file.direct_learning_resource.resource_type + == LearningResourceType.video.name + ): + material_ids.append(content_file.direct_learning_resource.id) elif content_file.direct_learning_resource: direct_resource = content_file.direct_learning_resource direct_resource.published = False @@ -1082,10 +1090,7 @@ def load_learning_material( resource_category = resource_categories[0] with transaction.atomic(): - if resource_category in VIDEO_CONTENT_CATEGORIES: - resource_type = LearningResourceType.video.name - else: - resource_type = LearningResourceType.document.name + resource_type = LearningResourceType.document.name learning_resource, created = LearningResource.objects.update_or_create( readable_id=f"{course_run.run_id}-{content_file.key}", platform=course_run.learning_resource.platform, @@ -1103,6 +1108,9 @@ def load_learning_material( learning_resource, learning_material_tags, is_content_file=False ) + if content_file.image_src: + load_image(learning_resource, {"url": content_file.image_src}) + content_file.direct_learning_resource = learning_resource content_file.save() @@ -1367,6 +1375,76 @@ def load_video(video_data: dict) -> LearningResource: return learning_resource +def load_video_with_content_file( + youtube_video_data: dict, content_file: ContentFile +) -> LearningResource: + """ + Load a video into the database + + Args: + youtube_video_data (dict): the video data + content_file (ContentFile): the content file that matches the youtube id + + Returns: + LearningResource: the created or updated video resource + """ + + video_data = youtube_video_data.copy() + readable_id = video_data.pop("readable_id") + video_fields = video_data.pop("video", {}) + image_data = video_data.pop("image", None) + video_data["resource_category"] = LearningResourceType.video.value + video_data.pop("youtube_id", None) + video_data.pop("offered_by", None) + video_data.pop("platform", None) + + # combine youtube data with content file data. + video_data["url"] = content_file.url + video_data["title"] = content_file.title + video_data["description"] = content_file.description + + if content_file.content_tags.filter(name=OCW_INSTRUCTOR_INSIGHTS_TAG).exists(): + video_data["resource_category"] = LearningResourceType.video.value + else: + video_data["resource_category"] = OCW_CONTENT_CATEGORY_LECTURE_VIDEOS + + with transaction.atomic(): + ( + learning_resource, + created, + ) = LearningResource.objects.update_or_create( + readable_id=readable_id, + resource_type=LearningResourceType.video.name, + etl_source=video_data["etl_source"], + defaults=video_data, + ) + Video.objects.update_or_create( + learning_resource=learning_resource, defaults=video_fields + ) + load_image(learning_resource, image_data) + parent_resource = content_file.run.learning_resource + learning_resource.topics.set(parent_resource.topics.all()) + learning_resource.offered_by = parent_resource.offered_by + learning_resource.platform = parent_resource.platform + learning_resource.save() + + content_file.direct_learning_resource = learning_resource + content_file.save() + + parent_resource.resources.add( + learning_resource, + through_defaults={ + "relation_type": ( + LearningResourceRelationTypes.COURSE_LEARNING_MATERIALS + ), + }, + ) + + update_index(learning_resource, created) + + return learning_resource + + def load_videos(videos_data: iter) -> list[LearningResource]: """ Load a list of videos into the database @@ -1612,14 +1690,9 @@ def load_playlist( if create_videos: video_resources = load_videos(videos_data) else: - video_resources = find_existing_videos(videos_data) + video_resources = load_videos_from_content_files(videos_data) if video_resources is None: - log.info( - "Skipping playlist %s: not all videos have matching video. ", - playlist_id, - ) - existing_resource = LearningResource.objects.filter( readable_id=playlist_id, resource_type=LearningResourceType.video_playlist.name, @@ -1630,6 +1703,16 @@ def load_playlist( existing_resource.save() update_index(existing_resource, newly_created=False) return None + else: + # Update the search index for parent courses which now + # have a relationship to the newly created videos + parent_ids = set( + ContentFile.objects.filter( + direct_learning_resource__in=video_resources + ).values_list("run__learning_resource_id", flat=True) + ) + for parent in LearningResource.objects.filter(id__in=parent_ids): + update_index(parent, newly_created=False) playlist_data["resource_category"] = LearningResourceType.video_playlist.value with transaction.atomic(): @@ -1655,11 +1738,8 @@ def load_playlist( load_topics(playlist_resource, most_common_topics(video_resources)) - # Unpublish any videos from youtube that are no longer in the playlist - # OCW content file videos are removed from the playlist but not unpublished unpublished_videos = playlist_resource.resources.filter( resource_type=LearningResourceType.video.name, - platform=PlatformType.youtube.name, published=True, ).exclude(id__in=[video.id for video in video_resources]) unpublished_video_ids = list(unpublished_videos.values_list("id", flat=True)) @@ -1683,39 +1763,53 @@ def load_playlist( return playlist_resource -def find_existing_videos(videos_data: list[dict]) -> list[LearningResource] | None: +def load_videos_from_content_files( + youtube_videos_data: Iterable[dict], +) -> list[LearningResource] | None: """ - Find existing video resources matching the given video data. + Find existing video content files matching the given video data. + + Checks how many videos have matching ContentFile objects by youtube_id. + If at least 60% of the videos have matches, loads those videos. + Otherwise returns None. Args: - videos_data (list of dict): list of video data dicts + youtube_videos_data (Iterable of dict): video data dicts from youtube Returns: list of LearningResource | None: list of existing video resources, - or None if any video is not found + or None if fewer than OCW_PLAYLIST_VIDEO_THRESHOLD + of videos have matching content files """ - videos = [] - for video_data in videos_data: + + matched = [] + total = 0 + for video_data in youtube_videos_data: + total += 1 youtube_id = video_data.get("youtube_id") if not youtube_id: - return None - learning_resource = ( - ContentFile.objects.filter( - youtube_id=youtube_id, - direct_learning_resource__resource_type=LearningResourceType.video.name, - direct_learning_resource__published=True, - ) - .select_related("direct_learning_resource") - .first() + continue + content_file = ContentFile.objects.filter( + youtube_id=youtube_id, + ).first() + if content_file: + matched.append((video_data, content_file)) + + threshold = total * OCW_PLAYLIST_VIDEO_THRESHOLD + if len(matched) < threshold: + log.info( + "Only %d/%d videos have matching content files (threshold: %.2f). " + "Skipping playlist.", + len(matched), + total, + OCW_PLAYLIST_VIDEO_THRESHOLD, ) - if learning_resource: - videos.append(learning_resource.direct_learning_resource) - else: - log.info( - "No existing published video resource found for youtube_id=%s", - youtube_id, - ) - return None + return None + + videos = [] + for video_data, content_file in matched: + video = load_video_with_content_file(video_data, content_file) + videos.append(video) return videos diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index c32198d4b4..5e9c08bc52 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -15,6 +15,7 @@ CONTENT_TYPE_FILE, CONTENT_TYPE_PAGE, CURRENCY_USD, + OCW_CONTENT_CATEGORY_LECTURE_VIDEOS, Availability, LearningResourceDelivery, LearningResourceRelationTypes, @@ -34,7 +35,6 @@ from learning_resources.etl.loaders import ( ProgramLoadResult, calculate_completeness, - find_existing_videos, load_content_file, load_content_files, load_course, @@ -58,7 +58,9 @@ load_topics, load_video, load_video_channels, + load_video_with_content_file, load_videos, + load_videos_from_content_files, ) from learning_resources.etl.mitxonline import transform_programs from learning_resources.etl.utils import get_s3_prefix_for_source @@ -2284,70 +2286,25 @@ def test_load_playlist_removed_videos_unpublished( load_playlist(channel, props) - # YouTube video should be unpublished + # Both OCW and YouTube videos should be unpublished youtube_video.refresh_from_db() assert youtube_video.published is False - # OCW video should NOT be unpublished ocw_video.refresh_from_db() - assert ocw_video.published is True + assert ocw_video.published is False # bulk_resources_unpublished_actions called only with the youtube video mock_bulk_unpublish.assert_called_once_with( - [youtube_video.id], + [youtube_video.id, ocw_video.id], LearningResourceType.video.name, ) -@pytest.mark.parametrize("all_videos_exist", [True, False]) -def test_find_existing_videos(all_videos_exist): - """Test that find_existing_videos returns matching video resources or None""" - video_resource_1 = VideoFactory.create().learning_resource - video_resource_2 = VideoFactory.create().learning_resource - run = LearningResourceRunFactory.create() - - # Create content files with youtube_id linked to video resources - ContentFileFactory.create( - run=run, - youtube_id="yt_id_1", - direct_learning_resource=video_resource_1, - ) - ContentFileFactory.create( - run=run, - youtube_id="yt_id_2", - direct_learning_resource=video_resource_2, - ) - - if all_videos_exist: - videos_data = [ - {"youtube_id": "yt_id_1"}, - {"youtube_id": "yt_id_2"}, - ] - result = find_existing_videos(videos_data) - assert result is not None - assert len(result) == 2 - assert result[0].id == video_resource_1.id - assert result[1].id == video_resource_2.id - else: - videos_data = [ - {"youtube_id": "yt_id_1"}, - {"youtube_id": "missing_yt_id"}, - ] - result = find_existing_videos(videos_data) - assert result is None - - -def test_find_existing_videos_empty_list(): - """Test that find_existing_videos returns an empty list for empty input""" - result = find_existing_videos([]) - assert result == [] - - @pytest.mark.parametrize("all_videos_exist", [True, False]) def test_load_playlist_create_videos_false( mocker, all_videos_exist, mock_get_similar_topics_qdrant ): - """Test load_playlist with create_videos=False uses find_existing_videos""" + """Test load_playlist with create_videos=False uses load_videos_from_content_files""" expected_topics = [{"name": "Biology"}, {"name": "Physics"}] [ LearningResourceTopicFactory.create(name=topic["name"]) @@ -2365,17 +2322,19 @@ def test_load_playlist_create_videos_false( ) channel = VideoChannelFactory.create() - run = LearningResourceRunFactory.create() if all_videos_exist: video_resource = VideoFactory.create().learning_resource - ContentFileFactory.create( - run=run, - youtube_id="yt_existing", - direct_learning_resource=video_resource, + mock_load_from_cf = mocker.patch( + "learning_resources.etl.loaders.load_videos_from_content_files", + return_value=[video_resource], ) videos_data = [{"youtube_id": "yt_existing"}] else: + mock_load_from_cf = mocker.patch( + "learning_resources.etl.loaders.load_videos_from_content_files", + return_value=None, + ) videos_data = [{"youtube_id": "yt_missing"}] playlist = VideoPlaylistFactory.build().learning_resource @@ -2397,17 +2356,150 @@ def test_load_playlist_create_videos_false( result = load_playlist(channel, props) + mock_load_from_cf.assert_called_once() + if all_videos_exist: assert isinstance(result, LearningResource) assert result.resources.count() == 1 assert result.video_playlist.channel == channel else: assert result is None - # update_index should not be called for the playlist - # (only possibly for unpublishing an existing one) mock_update_index.assert_not_called() +def test_load_video_with_content_file(mocker): + """Test that load_video_with_content_file creates a video resource + combining youtube data with OCW content file data + """ + mock_update_index = mocker.patch( + "learning_resources.etl.loaders.update_index", + ) + + ocw_platform = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name) + offeror = LearningResourceOfferorFactory.create(is_ocw=True) + topic = LearningResourceTopicFactory.create(name="Physics") + parent_course = LearningResourceFactory.create( + is_course=True, + platform=ocw_platform, + offered_by=offeror, + etl_source=ETLSource.ocw.name, + ) + parent_course.topics.set([topic]) + run = LearningResourceRunFactory.create(learning_resource=parent_course) + + content_file = ContentFileFactory.create( + run=run, + title="OCW Lecture 1", + url="https://ocw.mit.edu/lecture1", + description="OCW lecture description", + youtube_id="yt_abc123", + ) + + youtube_video_data = { + "readable_id": "yt_abc123", + "platform": PlatformType.youtube.name, + "etl_source": ETLSource.youtube.name, + "title": "YouTube Title (should be overridden)", + "description": "YouTube description (should be overridden)", + "url": "https://youtube.com/watch?v=yt_abc123", + "published": True, + "video": {"duration": "PT30M"}, + "image": {"url": "https://i.ytimg.com/vi/yt_abc123/hqdefault.jpg"}, + "youtube_id": "yt_abc123", + "offered_by": {"code": "mit"}, + } + + result = load_video_with_content_file(youtube_video_data, content_file) + + assert isinstance(result, LearningResource) + assert result.resource_type == LearningResourceType.video.name + assert result.readable_id == "yt_abc123" + # Title/description/url should come from content file, not youtube + assert result.title == "OCW Lecture 1" + assert result.description == "OCW lecture description" + assert result.url == "https://ocw.mit.edu/lecture1" + assert result.resource_category == OCW_CONTENT_CATEGORY_LECTURE_VIDEOS + # Platform/offered_by/topics should come from parent course + assert result.platform == ocw_platform + assert result.offered_by == offeror + assert list(result.topics.values_list("name", flat=True)) == ["Physics"] + # Video model should be created + assert result.video.duration == "PT30M" + # Content file should be linked + content_file.refresh_from_db() + assert content_file.direct_learning_resource == result + # Parent course should have this as a child + assert parent_course.children.filter( + child=result, + relation_type=LearningResourceRelationTypes.COURSE_LEARNING_MATERIALS, + ).exists() + assert mock_update_index.call_count == 1 + + +@pytest.mark.parametrize( + ("match_ratio", "expected_result"), + [ + ("all_match", "returns_videos"), + ("three_quarter_match", "returns_videos"), + ("below_threshold", "returns_none"), + ], +) +def test_load_videos_from_content_files(mocker, match_ratio, expected_result): + """Test that load_videos_from_content_files respects the 3/4 threshold""" + mock_load = mocker.patch( + "learning_resources.etl.loaders.load_video_with_content_file", + side_effect=lambda _data, _cf: LearningResourceFactory.create( + resource_type=LearningResourceType.video.name, + ), + ) + + run = LearningResourceRunFactory.create() + + # Create 4 content files with youtube_ids + for i in range(4): + ContentFileFactory.create( + run=run, + youtube_id=f"yt_id_{i}", + ) + + if match_ratio == "all_match": + # All 4 match + videos_data = [{"youtube_id": f"yt_id_{i}"} for i in range(4)] + elif match_ratio == "three_quarter_match": + # 3 out of 4 match (exactly 75%) + videos_data = [ + {"youtube_id": "yt_id_0"}, + {"youtube_id": "yt_id_1"}, + {"youtube_id": "yt_id_2"}, + {"youtube_id": "yt_missing"}, + ] + else: + # 2 out of 4 match (50%, below threshold) + videos_data = [ + {"youtube_id": "yt_id_0"}, + {"youtube_id": "yt_id_1"}, + {"youtube_id": "yt_missing_1"}, + {"youtube_id": "yt_missing_2"}, + ] + + result = load_videos_from_content_files(iter(videos_data)) + + if expected_result == "returns_videos": + assert result is not None + expected_count = 4 if match_ratio == "all_match" else 3 + assert len(result) == expected_count + assert mock_load.call_count == expected_count + else: + assert result is None + mock_load.assert_not_called() + + +def test_load_videos_from_content_files_empty_input(): + """Test that an empty iterable returns an empty list""" + result = load_videos_from_content_files(iter([])) + assert result == [] + + def test_load_playlists_unpublish(mocker): """Test load_playlists when a video/playlist gets unpublished""" mocker.patch("learning_resources_search.tasks.bulk_deindex_learning_resources.si") @@ -3181,24 +3273,14 @@ def test_load_documents(mocker, climate_platform, mock_get_similar_topics_qdrant @pytest.mark.django_db -@pytest.mark.parametrize( - ("create_ocw_learning_materials", "show_ocw_lecture_videos"), - [ - (True, False), - (False, True), - (False, False), - ], -) -def test_load_learning_materials( - mocker, settings, create_ocw_learning_materials, show_ocw_lecture_videos -): +@pytest.mark.parametrize("create_ocw_learning_materials", [True, False]) +def test_load_learning_materials(mocker, settings, create_ocw_learning_materials): """ Test that load_learning_materials runs load_learning_material - based on CREATE_OCW_LEARNING_MATERIALS and SHOW_OCW_LECTURE_VIDEOS settings + based on CREATE_OCW_LEARNING_MATERIALS setting """ settings.CREATE_OCW_LEARNING_MATERIALS = create_ocw_learning_materials - settings.SHOW_OCW_LECTURE_VIDEOS = show_ocw_lecture_videos ocw = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name) ocw_course = CourseFactory.create( @@ -3209,7 +3291,6 @@ def test_load_learning_materials( relevant_content_tag = LearningResourceContentTagFactory.create( name="Programming Assignments" ) - lecture_video_tag = LearningResourceContentTagFactory.create(name="Lecture Videos") irrelevant_content_tag = LearningResourceContentTagFactory.create(name="Syllabus") no_longer_relevant_resource = LearningResourceFactory.create() @@ -3220,12 +3301,6 @@ def test_load_learning_materials( content_type=CONTENT_TYPE_FILE, ) - lecture_video_content_file = ContentFileFactory.create( - run=ocw_course.learning_resource.runs.first(), - content_tags=[lecture_video_tag], - content_type=CONTENT_TYPE_FILE, - ) - other_content_file = ContentFileFactory.create( run=ocw_course.learning_resource.runs.first(), content_tags=[irrelevant_content_tag], @@ -3241,65 +3316,41 @@ def test_load_learning_materials( course_run=ocw_course.learning_resource.runs.first(), content_file_ids=[ learning_material_content_file.id, - lecture_video_content_file.id, other_content_file.id, ], ) if create_ocw_learning_materials: - # Both programming assignments and lecture videos are promoted - assert load_learning_materials_spy.call_count == 2 + # Programming assignments are promoted + assert load_learning_materials_spy.call_count == 1 load_learning_materials_spy.assert_any_call( ocw_course.learning_resource.runs.first(), learning_material_content_file, {"Programming Assignments"}, ) - load_learning_materials_spy.assert_any_call( - ocw_course.learning_resource.runs.first(), - lecture_video_content_file, - {"Lecture Videos"}, - ) - resource_relationships = ocw_course.learning_resource.children.all() - assert resource_relationships.count() == 2 - # 2 load_learning_material calls (each calls update_index) - # + 1 for unpublishing no_longer_relevant_resource - assert mock_index.call_count == 3 - elif show_ocw_lecture_videos: - # Only lecture videos are promoted - assert load_learning_materials_spy.call_count == 1 - load_learning_materials_spy.assert_called_with( - ocw_course.learning_resource.runs.first(), - lecture_video_content_file, - {"Lecture Videos"}, - ) resource_relationships = ocw_course.learning_resource.children.all() assert resource_relationships.count() == 1 - lecture_video_content_file.refresh_from_db() - assert ( - resource_relationships.first().child.id - == lecture_video_content_file.direct_learning_resource.id - ) - # 1 load_learning_material call + 1 for unpublishing no_longer_relevant_resource + # 1 load_learning_material call (calls update_index) + # + 1 for unpublishing no_longer_relevant_resource assert mock_index.call_count == 2 + + no_longer_relevant_resource.refresh_from_db() + assert no_longer_relevant_resource.published is False else: # Nothing is promoted assert load_learning_materials_spy.call_count == 0 resource_relationships = ocw_course.learning_resource.children.all() assert resource_relationships.count() == 0 - # 1 for unpublishing no_longer_relevant_resource - assert mock_index.call_count == 1 - - no_longer_relevant_resource.refresh_from_db() - assert no_longer_relevant_resource.published is False @pytest.mark.django_db -def test_load_learning_materials_demotes_page_content_files(mocker): +def test_load_learning_materials_demotes_page_content_files(mocker, settings): """ Page content files should not be promoted to learning resources. If a page content file was previously promoted (has direct_learning_resource), load_learning_materials should unpublish that resource and clear the link. """ + settings.CREATE_OCW_LEARNING_MATERIALS = True ocw = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name) ocw_course = CourseFactory.create( platform=ocw.code, @@ -3338,6 +3389,60 @@ def test_load_learning_materials_demotes_page_content_files(mocker): assert ocw_course.learning_resource.children.count() == 0 +@pytest.mark.django_db +def test_load_learning_materials_preserves_videos(mocker, settings): + """ + Content files with a direct_learning_resource whose resource_type is + video should be preserved as-is (not re-promoted or unpublished). + Their resource id should appear in the final material_ids list so + the course keeps them as children. + """ + settings.CREATE_OCW_LEARNING_MATERIALS = True + ocw = LearningResourcePlatformFactory.create(code=PlatformType.ocw.name) + ocw_course = CourseFactory.create( + platform=ocw.code, + learning_resource__is_course=True, + ) + run = ocw_course.learning_resource.runs.first() + + # A video resource created by YouTube ETL + video_resource = LearningResourceFactory.create( + resource_type=LearningResourceType.video.name, + published=True, + ) + + # Content file linked to the video resource + video_content_file = ContentFileFactory.create( + run=run, + direct_learning_resource=video_resource, + content_type=CONTENT_TYPE_FILE, + ) + + mock_index = mocker.patch("learning_resources.etl.loaders.update_index") + + loaders.load_learning_materials( + course_run=run, + content_file_ids=[video_content_file.id], + ) + + # The video resource should still be published and unchanged + video_resource.refresh_from_db() + assert video_resource.published is True + + # The content file should still be linked to the video resource + video_content_file.refresh_from_db() + assert video_content_file.direct_learning_resource == video_resource + + # The course should have the video as a child + assert ocw_course.learning_resource.children.filter( + child=video_resource, + relation_type=LearningResourceRelationTypes.COURSE_LEARNING_MATERIALS, + ).exists() + + # update_index should not have been called (no changes needed) + mock_index.assert_not_called() + + @pytest.mark.django_db @pytest.mark.parametrize("learning_material_exists", [True, False]) def test_load_learning_material(mocker, learning_material_exists): diff --git a/learning_resources/etl/pipelines.py b/learning_resources/etl/pipelines.py index 5028ca72e7..78cadb436a 100644 --- a/learning_resources/etl/pipelines.py +++ b/learning_resources/etl/pipelines.py @@ -160,10 +160,7 @@ def ocw_courses_etl( calc_completeness=True, ) - if ( - settings.CREATE_OCW_LEARNING_MATERIALS - or settings.SHOW_OCW_LECTURE_VIDEOS - ): + if settings.CREATE_OCW_LEARNING_MATERIALS: loaders.load_learning_materials(course_run, content_file_ids) else: log.info("No course data found for %s", url_path) diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py index 3063ad5bf4..cdd9779f44 100644 --- a/learning_resources/tasks.py +++ b/learning_resources/tasks.py @@ -402,14 +402,10 @@ def update_ocw_learning_material_resources(self): # noqa: ARG001 ocw courses or content files """ - if ( - not settings.CREATE_OCW_LEARNING_MATERIALS - and not settings.SHOW_OCW_LECTURE_VIDEOS - ): + if not settings.CREATE_OCW_LEARNING_MATERIALS: message = ( "update_ocw_learning_material_resources cannot run because " - "CREATE_OCW_LEARNING_MATERIALS and SHOW_OCW_LECTURE_VIDEOS " - "flags are set to False." + "CREATE_OCW_LEARNING_MATERIALS flag is set to False." ) raise RuntimeError(message) diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index 100099f532..6de8bc032c 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -494,9 +494,8 @@ def generate_filter_clauses(search_params): } }, {"term": {"resource_type": "course"}}, + {"term": {"resource_type": "video"}}, ] - if settings.SHOW_OCW_LECTURE_VIDEOS: - ocw_should.append({"term": {"resource_category": "Lecture Videos"}}) ocw_clause = { "bool": { "should": ocw_should, diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index 68b129ebe5..b17308be42 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -2475,6 +2475,7 @@ def test_execute_learn_search_for_learning_resource_query_filter_ocw_files(opens } }, {"term": {"resource_type": "course"}}, + {"term": {"resource_type": "video"}}, ], "minimum_should_match": 1, } diff --git a/main/settings.py b/main/settings.py index ee45d02bce..c9b9c7a790 100644 --- a/main/settings.py +++ b/main/settings.py @@ -938,9 +938,3 @@ def get_all_config_keys(): # Create all learning material resources for OCW courses # Learning material resources are behind show_ocw_files flag in search CREATE_OCW_LEARNING_MATERIALS = get_bool("CREATE_OCW_LEARNING_MATERIALS", default=False) -# Create just lecture video resources for OCW courses. -# OCW lecture video resources will be displayed in search -# regardless of show_ocw_files flag -# If the video config is set to `create_videos`=False ocw videos will be added to -# playlists -SHOW_OCW_LECTURE_VIDEOS = get_bool("SHOW_OCW_LECTURE_VIDEOS", default=False) From 289bdce1e913c8f4d169c9605e2ac285d3039b36 Mon Sep 17 00:00:00 2001 From: Doof Date: Fri, 8 May 2026 14:13:42 +0000 Subject: [PATCH 5/5] Release 0.66.12 --- RELEASE.rst | 8 ++++++++ main/settings.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index cdd32f7d60..5693d1d0da 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -1,6 +1,14 @@ Release Notes ============= +Version 0.66.12 +--------------- + +- refactor ocw video etl (#3308) +- fix: update metadata for video pages (#3310) +- Both ETL and webhook should populate OVS resource_category when applicable. (#3313) +- feat: Welcome to learn email on new account setup (#3189) + Version 0.66.10 (Released May 07, 2026) --------------- diff --git a/main/settings.py b/main/settings.py index c9b9c7a790..f9d14bbb99 100644 --- a/main/settings.py +++ b/main/settings.py @@ -35,7 +35,7 @@ from main.settings_pluggy import * # noqa: F403 from openapi.settings_spectacular import open_spectacular_settings -VERSION = "0.66.10" +VERSION = "0.66.12" log = logging.getLogger()