diff --git a/apps/cms/AGENTS.md b/apps/cms/AGENTS.md index 749beb2b..d6607473 100644 --- a/apps/cms/AGENTS.md +++ b/apps/cms/AGENTS.md @@ -119,6 +119,16 @@ Scope: `apps/cms`. | `easter-dates` | `easter-dates.json` | No | blocks | | `related-questions` | `related-questions.json` | Yes (`related-question-item`) | blocks, Section DZ | +--- + +## Local Testing: Gateway Sync + +Full runbook: [`docs/solutions/cms/gateway-sync-local-testing.md`](../../docs/solutions/cms/gateway-sync-local-testing.md) + +Covers: env setup, admin creation, API token generation (not admin JWT — see the auth gotcha), dry-run, live import, status polling, and guard verification. + +--- + ### Seed script conventions (`scripts/seed-easter.mjs`) - Top-level blocks use `__typename: "ComponentSections"` (GraphQL format) diff --git a/apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts b/apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts index 6024bce3..12104353 100644 --- a/apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts +++ b/apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts @@ -2,19 +2,45 @@ import type { Core } from "@strapi/strapi" import { runSync, resolveScope, getSyncStatus } from "../services/gateway-sync" import { formatError } from "../services/strapi-helpers" +type TriggerBody = { + scope?: string | string[] + collectionIds?: string[] + videoIds?: string[] + dryRun?: boolean +} + type StrapiContext = { - request: { body?: { scope?: string | string[] } } + request: { body?: TriggerBody } status: number body: unknown } export default ({ strapi }: { strapi: Core.Strapi }) => ({ async trigger(ctx: StrapiContext) { - const scope = ctx.request.body?.scope + const { scope, collectionIds, videoIds, dryRun } = ctx.request.body ?? {} const phases = resolveScope(scope) + const options = { scope, collectionIds, videoIds, dryRun } + const isLimited = + (collectionIds && collectionIds.length > 0) || + (videoIds && videoIds.length > 0) + + // Dry-run requests are synchronous — return the resolved selection + if (dryRun && isLimited) { + try { + const result = await runSync(strapi, options) + ctx.status = 200 + ctx.body = result + } catch (error) { + strapi.log.error(`[gateway-sync] Dry-run failed: ${formatError(error)}`) + ctx.status = 500 + ctx.body = { error: formatError(error) } + } + return + } + // Fire and forget — sync runs in background - runSync(strapi, scope).catch((error) => { + runSync(strapi, options).catch((error) => { strapi.log.error( `[gateway-sync] Background sync failed: ${formatError(error)}`, ) @@ -22,8 +48,11 @@ export default ({ strapi }: { strapi: Core.Strapi }) => ({ ctx.status = 202 ctx.body = { - message: `Gateway sync started`, + message: isLimited + ? "Gateway limited seed import started" + : "Gateway sync started", phases, + isLimited: !!isLimited, status: getSyncStatus(), } }, diff --git a/apps/cms/src/api/gateway-sync/routes/gateway-sync.ts b/apps/cms/src/api/gateway-sync/routes/gateway-sync.ts index 98e79406..4b388538 100644 --- a/apps/cms/src/api/gateway-sync/routes/gateway-sync.ts +++ b/apps/cms/src/api/gateway-sync/routes/gateway-sync.ts @@ -1,3 +1,11 @@ +/** + * Gateway sync API routes. + * + * Auth note: these routes use `admin::isAuthenticatedAdmin` under the content-API scope. + * Use a full-access API token (not an admin JWT) — admin JWTs return 401 here. + * + * @see docs/solutions/cms/gateway-sync-local-testing.md — local testing runbook + */ export default { routes: [ { diff --git a/apps/cms/src/api/gateway-sync/services/gateway-sync.ts b/apps/cms/src/api/gateway-sync/services/gateway-sync.ts index ac9bfb73..f8f931e9 100644 --- a/apps/cms/src/api/gateway-sync/services/gateway-sync.ts +++ b/apps/cms/src/api/gateway-sync/services/gateway-sync.ts @@ -1,10 +1,18 @@ import type { Core } from "@strapi/strapi" -import { type SyncStats, formatError } from "./strapi-helpers" +import { + type SyncStats, + formatError, + publishDrafts, +} from "./strapi-helpers" import { syncLanguages } from "./sync-languages" import { syncCountries } from "./sync-countries" import { syncKeywords } from "./sync-keywords" import { syncVideos } from "./sync-videos" import { syncVideoVariants } from "./sync-video-variants" +import { + resolveCollectionVideoIds, + type ResolveCollectionVideoIdsResult, +} from "./resolve-collection-video-ids" export type SyncPhase = | "languages" @@ -32,15 +40,42 @@ type SyncResult = { error?: string } +type PublishStage = { + name: string + contentTypes: string[] +} + +const REPUBLISH_UPDATED_UIDS = new Set([ + "api::video.video", + "api::video-subtitle.video-subtitle", + "api::video-variant.video-variant", + "api::bible-citation.bible-citation", + "api::video-study-question.video-study-question", +]) + +/** Selection context for limited seed imports */ +export type SyncSelection = { + collectionIds: string[] + videoIds: string[] + resolvedVideoIds: string[] + collectionVideoIds: Record + missingCollectionIds: string[] + isFullSync: boolean + dryRun: boolean +} + +/** Maximum total IDs per limited import request */ +const MAX_LIMITED_IDS = 500 + const PHASE_RUNNERS: Record< SyncPhase, - (strapi: Core.Strapi) => Promise + (strapi: Core.Strapi, selection: SyncSelection) => Promise > = { - languages: syncLanguages, - countries: syncCountries, - keywords: syncKeywords, - videos: syncVideos, - "video-variants": syncVideoVariants, + languages: (strapi) => syncLanguages(strapi), + countries: (strapi) => syncCountries(strapi), + keywords: (strapi) => syncKeywords(strapi), + videos: (strapi, selection) => syncVideos(strapi, selection), + "video-variants": (strapi, selection) => syncVideoVariants(strapi, selection), } let syncInProgress = false @@ -77,38 +112,210 @@ function logPhase(strapi: Core.Strapi, phase: PhaseResult) { ) } +const PUBLISH_STAGES: PublishStage[] = [ + { + name: "references", + contentTypes: [ + "api::continent.continent", + "api::language.language", + "api::country.country", + "api::country-language.country-language", + "api::keyword.keyword", + "api::bible-book.bible-book", + "api::video-origin.video-origin", + "api::video-edition.video-edition", + "api::mux-video.mux-video", + ], + }, + { + name: "videos", + contentTypes: ["api::video.video"], + }, + { + name: "video-children", + contentTypes: [ + "api::video-subtitle.video-subtitle", + "api::video-variant.video-variant", + "api::bible-citation.bible-citation", + "api::video-study-question.video-study-question", + ], + }, +] + +async function publishStageDrafts( + strapi: Core.Strapi, + stage: PublishStage, +): Promise { + const failures: string[] = [] + + for (const uid of stage.contentTypes) { + const result = await publishDrafts(strapi, uid, { + includeUpdatedDrafts: REPUBLISH_UPDATED_UIDS.has(uid), + }) + + if (result.published > 0) { + strapi.log.info( + `[gateway-sync] Published ${result.published} draft ${uid.split(".")[1]} records`, + ) + } + + if (result.failed > 0) { + failures.push( + `${uid} (${result.failed} failed: ${result.failedDocumentIds.slice(0, 3).join(", ")})`, + ) + } + } + + if (failures.length > 0) { + throw new Error( + `Publish stage ${stage.name} failed for ${failures.join("; ")}`, + ) + } +} + +export type SyncOptions = { + scope?: string | string[] + collectionIds?: string[] + videoIds?: string[] + dryRun?: boolean +} + +function isLimitedImportEnabled(): boolean { + return process.env.GATEWAY_SYNC_ENABLE_LIMITED_IMPORT === "true" +} + +export async function buildSelection( + options: SyncOptions, +): Promise { + const collectionIds = options.collectionIds ?? [] + const videoIds = options.videoIds ?? [] + const isFullSync = collectionIds.length === 0 && videoIds.length === 0 + + if (isFullSync) { + return { + collectionIds: [], + videoIds: [], + resolvedVideoIds: [], + collectionVideoIds: {}, + missingCollectionIds: [], + isFullSync: true, + dryRun: false, + } + } + + const resolved: ResolveCollectionVideoIdsResult = + collectionIds.length > 0 + ? await resolveCollectionVideoIds({ collectionIds }) + : { + collectionVideoIds: {}, + resolvedVideoIds: [], + missingCollectionIds: [], + } + + // Union resolved collection video IDs with explicit videoIds, deduped + const allVideoIds = new Set([...resolved.resolvedVideoIds, ...videoIds]) + + return { + collectionIds, + videoIds, + resolvedVideoIds: [...allVideoIds], + collectionVideoIds: resolved.collectionVideoIds, + missingCollectionIds: resolved.missingCollectionIds, + isFullSync: false, + dryRun: options.dryRun ?? false, + } +} + export async function runSync( strapi: Core.Strapi, - scope?: string | string[], + options: SyncOptions = {}, ): Promise { if (syncInProgress) { strapi.log.warn("[gateway-sync] Sync already in progress, skipping") return { skipped: true } } - const phasesToRun = resolveScope(scope) + const phasesToRun = resolveScope(options.scope) if (phasesToRun.length === 0) { strapi.log.warn("[gateway-sync] No valid phases in scope, skipping") return { skipped: true } } + // Build selection context + const selection = await buildSelection(options) + + // Reject limited imports if env guard is not enabled + if (!selection.isFullSync && !isLimitedImportEnabled()) { + strapi.log.warn( + "[gateway-sync] Limited import rejected: GATEWAY_SYNC_ENABLE_LIMITED_IMPORT is not enabled", + ) + return { + error: + "Limited imports are disabled. Set GATEWAY_SYNC_ENABLE_LIMITED_IMPORT=true to enable.", + } + } + + // Validate total ID count for limited imports + if ( + !selection.isFullSync && + selection.collectionIds.length + (options.videoIds?.length ?? 0) > + MAX_LIMITED_IDS + ) { + return { + error: `Too many IDs in limited import request. Maximum ${MAX_LIMITED_IDS} total collectionIds + videoIds allowed.`, + } + } + + // Dry run: return resolved selection without executing sync + if (selection.dryRun) { + return { + scope: phasesToRun, + duration: 0, + dryRun: { + isFullSync: false, + requestedCollectionIds: selection.collectionIds, + requestedVideoIds: selection.videoIds, + collectionVideoIds: selection.collectionVideoIds, + resolvedVideoIds: selection.resolvedVideoIds, + missingCollectionIds: selection.missingCollectionIds, + phases: phasesToRun, + }, + } as SyncResult & { dryRun: unknown } + } + syncInProgress = true const startTime = Date.now() try { + const mode = selection.isFullSync ? "full" : "limited" strapi.log.info( - `[gateway-sync] ========== Starting sync (${phasesToRun.join(", ")}) ==========`, + `[gateway-sync] ========== Starting ${mode} sync (${phasesToRun.join(", ")}) ==========`, ) + if (!selection.isFullSync) { + strapi.log.info( + `[gateway-sync] Limited import: ${selection.resolvedVideoIds.length} resolved video IDs from ${selection.collectionIds.length} collections + ${selection.videoIds.length} explicit videos`, + ) + if (selection.missingCollectionIds.length > 0) { + strapi.log.warn( + `[gateway-sync] Missing collection IDs (not found in gateway): ${selection.missingCollectionIds.join(", ")}`, + ) + } + } + const phases: PhaseResult[] = [] for (const phase of phasesToRun) { const runner = PHASE_RUNNERS[phase] - const stats = await runner(strapi) + const stats = await runner(strapi, selection) phases.push({ phase, ...stats }) } + for (const stage of PUBLISH_STAGES) { + await publishStageDrafts(strapi, stage) + } + const duration = Date.now() - startTime const result: SyncResult = { scope: phasesToRun, duration, phases } @@ -143,12 +350,12 @@ export async function runSync( } export async function runFullSync(strapi: Core.Strapi): Promise { - return runSync(strapi, "all") + return runSync(strapi, { scope: "all" }) } export default { runFullSync: ({ strapi }: { strapi: Core.Strapi }) => runFullSync(strapi), - runSync: ({ strapi }: { strapi: Core.Strapi }, scope?: string | string[]) => - runSync(strapi, scope), + runSync: ({ strapi }: { strapi: Core.Strapi }, options?: SyncOptions) => + runSync(strapi, options), getSyncStatus, } diff --git a/apps/cms/src/api/gateway-sync/services/resolve-collection-video-ids.ts b/apps/cms/src/api/gateway-sync/services/resolve-collection-video-ids.ts new file mode 100644 index 00000000..63cf8700 --- /dev/null +++ b/apps/cms/src/api/gateway-sync/services/resolve-collection-video-ids.ts @@ -0,0 +1,83 @@ +import { gql } from "@apollo/client/core" +import { getGatewayClient } from "./gateway-client" + +export type ResolveCollectionVideoIdsInput = { + collectionIds: string[] +} + +export type ResolveCollectionVideoIdsResult = { + collectionVideoIds: Record + resolvedVideoIds: string[] + missingCollectionIds: string[] +} + +/** + * Query gateway for top-level videos by ID, returning children for expansion. + * + * Collection IDs are coverage-style top-level video IDs (not JourneyCollection IDs). + * Each collection expands to its children's IDs, or to its own ID if it has no children. + */ +const RESOLVE_COLLECTION_VIDEOS_QUERY = gql` + query ResolveCollectionVideoIds($ids: [ID!]!) { + videos(where: { ids: $ids, published: true }, limit: 2000) { + id + label + children { + id + } + } + } +` + +type CollectionVideo = { + id: string + label: string + children: Array<{ id: string }> +} + +export async function resolveCollectionVideoIds( + input: ResolveCollectionVideoIdsInput, +): Promise { + if (input.collectionIds.length === 0) { + return { + collectionVideoIds: {}, + resolvedVideoIds: [], + missingCollectionIds: [], + } + } + + const { data } = await getGatewayClient().query<{ + videos: CollectionVideo[] + }>({ + query: RESOLVE_COLLECTION_VIDEOS_QUERY, + variables: { ids: input.collectionIds }, + }) + + const returnedIds = new Set(data.videos.map((v) => v.id)) + const missingCollectionIds = input.collectionIds.filter( + (id) => !returnedIds.has(id), + ) + + const collectionVideoIds: Record = {} + const allVideoIds = new Set() + + for (const video of data.videos) { + if (video.children.length > 0) { + // Collection with children — include parent + all child IDs + const childIds = video.children.map((c) => c.id) + collectionVideoIds[video.id] = [video.id, ...childIds] + allVideoIds.add(video.id) + for (const id of childIds) allVideoIds.add(id) + } else { + // Leaf video — collection is the video itself + collectionVideoIds[video.id] = [video.id] + allVideoIds.add(video.id) + } + } + + return { + collectionVideoIds, + resolvedVideoIds: [...allVideoIds], + missingCollectionIds, + } +} diff --git a/apps/cms/src/api/gateway-sync/services/strapi-helpers.ts b/apps/cms/src/api/gateway-sync/services/strapi-helpers.ts index fb69253a..60cf4a6a 100644 --- a/apps/cms/src/api/gateway-sync/services/strapi-helpers.ts +++ b/apps/cms/src/api/gateway-sync/services/strapi-helpers.ts @@ -38,6 +38,12 @@ export type SyncStats = { errors: number } +export type PublishDraftsResult = { + published: number + failed: number + failedDocumentIds: string[] +} + export function docs(strapi: Core.Strapi, uid: string): DocumentService { return strapi.documents(uid as never) as unknown as DocumentService } @@ -64,6 +70,18 @@ export function clearableRelation( return docId ?? { set: [] } } +/** + * Use when a non-localized entry points at a localized target. + * Strapi publish/re-publish needs the target locale to resolve the + * correct published entity id from a documentId relation. + */ +export function localizedRelation( + docId: string | undefined, + locale = "en", +): { documentId: string; locale: string } | { set: [] } { + return docId ? { documentId: docId, locale } : { set: [] } +} + export async function findByGatewayId( strapi: Core.Strapi, uid: string, @@ -99,17 +117,112 @@ export async function upsertByGatewayId( documentId: existing.documentId, data: { ...data, gatewayId, source: "gateway" }, ...(options?.locale && { locale: options.locale }), - status: "published", }) return { documentId: existing.documentId, action: "updated" } } - const created = await docs(strapi, uid).create({ + // Create as draft only. Strapi v5 entity validator rejects the internal + // publish step of `create({status: "published"})` when manyToOne + // relation targets use documentId strings. The post-sync publishDrafts() + // call promotes all drafts after every phase completes. + const draft = await docs(strapi, uid).create({ data: { ...data, gatewayId, source: "gateway" }, ...(options?.locale && { locale: options.locale }), - status: "published", }) - return { documentId: created.documentId, action: "created" } + return { documentId: draft.documentId, action: "created" } +} + +/** + * Publish all unpublished gateway-sourced documents of a given type. + * Call after a batch of upsertByGatewayId creates to promote drafts. + * + * Strapi v5 entity validator rejects `create({status: "published"})` when + * manyToOne relation targets use documentId strings — the publish step's + * internal re-create fails validation. This helper publishes after the fact. + * + * Uses a direct table query rather than Document Service findMany because + * Document Service `status: "draft"` only returns documents that have been + * published at least once then edited. Brand-new draft-only records are + * invisible to it, and Strapi v5 keeps a draft row after publish, so the + * finder must select documentIds that either have no published row yet or + * have a newer draft row that still needs republishing. + */ +export async function publishDrafts( + strapi: Core.Strapi, + uid: string, + options?: { includeUpdatedDrafts?: boolean }, +): Promise { + const PAGE_SIZE = 500 + const tableName = (strapi as any).getModel(uid).collectionName as string + const knex = (strapi.db as any).connection + const includeUpdatedDrafts = options?.includeUpdatedDrafts ?? false + let published = 0 + let failed = 0 + const failedDocumentIds: string[] = [] + const attemptedDocumentIds = new Set() + + while (true) { + const draftRows = knex({ draft: tableName }) + .select("draft.document_id as document_id") + .max({ draft_updated_at: "draft.updated_at" }) + .where("draft.source", "gateway") + .whereNull("draft.published_at") + .groupBy("draft.document_id") + .as("draft_rows") + + const publishedRows = knex({ published: tableName }) + .select("published.document_id as document_id") + .max({ published_updated_at: "published.updated_at" }) + .whereNotNull("published.published_at") + .groupBy("published.document_id") + .as("published_rows") + + const rows = await knex + .from(draftRows) + .leftJoin( + publishedRows, + "published_rows.document_id", + "draft_rows.document_id", + ) + .select("draft_rows.document_id as documentId") + .where((builder: any) => { + builder.whereNull("published_rows.published_updated_at") + + if (includeUpdatedDrafts) { + builder.orWhere( + "draft_rows.draft_updated_at", + ">", + knex.ref("published_rows.published_updated_at"), + ) + } + }) + .limit(PAGE_SIZE) + + if (rows.length === 0) break + + const uniqueDocIds = rows + .map((r: { documentId: string }) => r.documentId) + .filter((documentId: string) => !attemptedDocumentIds.has(documentId)) + + if (uniqueDocIds.length === 0) break + + for (const documentId of uniqueDocIds) { + attemptedDocumentIds.add(documentId) + try { + await docs(strapi, uid).publish({ documentId }) + published++ + } catch (err) { + failed++ + failedDocumentIds.push(documentId) + strapi.log.warn( + `[publishDrafts] ${uid}: publish(${documentId}) failed: ${err instanceof Error ? err.message : String(err)}`, + ) + } + } + + if (rows.length < PAGE_SIZE) break + } + return { published, failed, failedDocumentIds } } /** diff --git a/apps/cms/src/api/gateway-sync/services/sync-countries.ts b/apps/cms/src/api/gateway-sync/services/sync-countries.ts index ab2a50a3..96b48463 100644 --- a/apps/cms/src/api/gateway-sync/services/sync-countries.ts +++ b/apps/cms/src/api/gateway-sync/services/sync-countries.ts @@ -9,6 +9,7 @@ import { upsertByGatewayId, softDeleteUnseen, clearableRelation, + localizedRelation, } from "./strapi-helpers" const COUNTRIES_QUERY = graphql(/* GraphQL */ ` @@ -170,8 +171,8 @@ export async function syncCountries(strapi: Core.Strapi): Promise { primary: cl.primary, suggested: cl.suggested, order: cl.order ?? undefined, - language: clearableRelation(langDocId), - country: countryDocId, + language: localizedRelation(langDocId), + country: localizedRelation(countryDocId), }, ) } catch (error) { diff --git a/apps/cms/src/api/gateway-sync/services/sync-keywords.ts b/apps/cms/src/api/gateway-sync/services/sync-keywords.ts index d2549ebc..b946a38e 100644 --- a/apps/cms/src/api/gateway-sync/services/sync-keywords.ts +++ b/apps/cms/src/api/gateway-sync/services/sync-keywords.ts @@ -7,7 +7,7 @@ import { upsertByGatewayId, softDeleteUnseen, buildGatewayIdMap, - clearableRelation, + localizedRelation, } from "./strapi-helpers" const KEYWORDS_QUERY = graphql(/* GraphQL */ ` @@ -32,10 +32,19 @@ export async function syncKeywords(strapi: Core.Strapi): Promise { strapi.log.info("[gateway-sync] Starting keyword sync") - const { data } = await getGatewayClient().query({ query: KEYWORDS_QUERY }) - const keywords = data.keywords + const { data, error } = await getGatewayClient().query({ + query: KEYWORDS_QUERY, + }) - if (keywords.length === 0) { + if (error) { + strapi.log.warn( + `[gateway-sync] Keywords query returned errors: ${error.message}`, + ) + } + + const keywords = data?.keywords + + if (!keywords || keywords.length === 0) { strapi.log.error( "[gateway-sync] Gateway returned 0 keywords — circuit breaker: skipping sync", ) @@ -67,7 +76,7 @@ export async function syncKeywords(strapi: Core.Strapi): Promise { kw.id, { value: kw.value, - language: clearableRelation(langDocId), + language: localizedRelation(langDocId), }, ) diff --git a/apps/cms/src/api/gateway-sync/services/sync-video-variants.ts b/apps/cms/src/api/gateway-sync/services/sync-video-variants.ts index f9a8867c..5d71e1ca 100644 --- a/apps/cms/src/api/gateway-sync/services/sync-video-variants.ts +++ b/apps/cms/src/api/gateway-sync/services/sync-video-variants.ts @@ -2,6 +2,8 @@ import type { Core } from "@strapi/strapi" import type { ResultOf } from "@graphql-typed-document-node/core" import { getGatewayClient } from "./gateway-client" import { graphql } from "../gql" +import type { SyncSelection } from "./gateway-sync" +import type { SelectedVideoVariant } from "./sync-videos" import { type SyncStats, formatError, @@ -9,6 +11,7 @@ import { softDeleteUnseen, buildGatewayIdMap, clearableRelation, + localizedRelation, } from "./strapi-helpers" const DEFAULT_PAGE_SIZE = 100 @@ -68,7 +71,16 @@ type GatewayVariant = ResultOf["videoVariants"][number] export async function syncVideoVariants( strapi: Core.Strapi, + selection: SyncSelection, ): Promise { + if (!selection.isFullSync) { + return syncVideoVariantsLimited(strapi, selection) + } + return syncVideoVariantsFull(strapi) +} + +/** Full sync: paginate all variants from gateway, soft-delete unseen */ +async function syncVideoVariantsFull(strapi: Core.Strapi): Promise { const stats: SyncStats = { created: 0, updated: 0, @@ -77,7 +89,7 @@ export async function syncVideoVariants( } const pageSize = getPageSize() - strapi.log.info("[gateway-sync] Starting video variant sync") + strapi.log.info("[gateway-sync] Starting video variant sync (full)") // Get total count from gateway for comparison let gatewayTotal = 0 @@ -215,10 +227,10 @@ export async function syncVideoVariants( downloadable: variant.downloadable, published: variant.published, brightcoveId: variant.brightcoveId ?? undefined, - language: clearableRelation(langDocId), + language: localizedRelation(langDocId), videoEdition: clearableRelation(editionDocId), muxVideo: clearableRelation(muxDocId), - video: { connect: [videoDocId] }, + video: localizedRelation(videoDocId), downloads, }, ) @@ -261,3 +273,153 @@ export async function syncVideoVariants( return stats } + +/** + * Limited sync: consume variants collected during the video sync phase. + * No separate gateway crawl needed — variants come from the selected-video query. + * No soft-delete — limited imports are always additive. + */ +async function syncVideoVariantsLimited( + strapi: Core.Strapi, + selection: SyncSelection, +): Promise { + const stats: SyncStats = { + created: 0, + updated: 0, + softDeleted: 0, + errors: 0, + } + + // Retrieve collected variants from the video sync phase + const collected = ( + selection as SyncSelection & { + _collectedVariants?: Array<{ + variant: SelectedVideoVariant + videoGatewayId: string + }> + } + )._collectedVariants + + if (!collected || collected.length === 0) { + strapi.log.info( + "[gateway-sync] No variants collected from selected videos — skipping variant sync", + ) + return stats + } + + strapi.log.info( + `[gateway-sync] Starting limited variant sync: ${collected.length} variants from selected videos`, + ) + + // Pre-load caches + const languageMap = await buildGatewayIdMap( + strapi, + "api::language.language", + "en", + ) + const videoMap = await buildGatewayIdMap(strapi, "api::video.video", "en") + const editionMap = new Map() + const muxMap = new Map() + + // Pre-pass: upsert editions and mux videos + for (const { variant } of collected) { + if (variant.videoEdition && !editionMap.has(variant.videoEdition.id)) { + try { + const { documentId } = await upsertByGatewayId( + strapi, + "api::video-edition.video-edition", + variant.videoEdition.id, + { name: variant.videoEdition.name ?? undefined }, + ) + editionMap.set(variant.videoEdition.id, documentId) + } catch (error) { + strapi.log.warn( + `[gateway-sync] Failed to upsert edition ${variant.videoEdition.id}: ${formatError(error)}`, + ) + } + } + if (variant.muxVideo && !muxMap.has(variant.muxVideo.id)) { + try { + const { documentId } = await upsertByGatewayId( + strapi, + "api::mux-video.mux-video", + variant.muxVideo.id, + { + assetId: variant.muxVideo.assetId ?? undefined, + playbackId: variant.muxVideo.playbackId ?? undefined, + }, + ) + muxMap.set(variant.muxVideo.id, documentId) + } catch (error) { + strapi.log.warn( + `[gateway-sync] Failed to upsert mux video ${variant.muxVideo.id}: ${formatError(error)}`, + ) + } + } + } + + // Upsert variants + for (const { variant, videoGatewayId } of collected) { + const videoDocId = videoMap.get(videoGatewayId) + if (!videoDocId) { + stats.errors++ + continue + } + + try { + const langDocId = languageMap.get(variant.language.id) + const editionDocId = variant.videoEdition + ? editionMap.get(variant.videoEdition.id) + : undefined + const muxDocId = variant.muxVideo + ? muxMap.get(variant.muxVideo.id) + : undefined + + const downloads = variant.downloads.map((dl) => ({ + quality: dl.quality, + size: dl.size, + height: dl.height, + width: dl.width, + bitrate: dl.bitrate, + url: dl.url, + })) + + const { action } = await upsertByGatewayId( + strapi, + "api::video-variant.video-variant", + variant.id, + { + slug: variant.slug ?? undefined, + duration: variant.duration, + lengthInMilliseconds: variant.lengthInMilliseconds, + hls: variant.hls ?? undefined, + dash: variant.dash ?? undefined, + share: variant.share ?? undefined, + downloadable: variant.downloadable, + published: variant.published, + brightcoveId: variant.brightcoveId ?? undefined, + language: localizedRelation(langDocId), + videoEdition: clearableRelation(editionDocId), + muxVideo: clearableRelation(muxDocId), + video: localizedRelation(videoDocId), + downloads, + }, + ) + + if (action === "created") stats.created++ + else if (action === "updated") stats.updated++ + } catch (error) { + stats.errors++ + strapi.log.warn( + `[gateway-sync] Failed to upsert variant ${variant.id}: ${formatError(error)}`, + ) + } + } + + // No soft-delete for limited imports + strapi.log.info( + `[gateway-sync] Limited variant sync complete: ${stats.created} created, ${stats.updated} updated, ${stats.errors} errors (soft-delete skipped)`, + ) + + return stats +} diff --git a/apps/cms/src/api/gateway-sync/services/sync-videos.ts b/apps/cms/src/api/gateway-sync/services/sync-videos.ts index 75583166..4091c66f 100644 --- a/apps/cms/src/api/gateway-sync/services/sync-videos.ts +++ b/apps/cms/src/api/gateway-sync/services/sync-videos.ts @@ -1,7 +1,9 @@ import type { Core } from "@strapi/strapi" import type { ResultOf } from "@graphql-typed-document-node/core" +import { gql } from "@apollo/client/core" import { getGatewayClient } from "./gateway-client" import { graphql } from "../gql" +import type { SyncSelection } from "./gateway-sync" import { type SyncStats, docs, @@ -12,6 +14,7 @@ import { softDeleteUnseen, buildGatewayIdMap, clearableRelation, + localizedRelation, } from "./strapi-helpers" const DEFAULT_PAGE_SIZE = 100 @@ -155,6 +158,183 @@ const VIDEOS_QUERY = graphql(/* GraphQL */ ` type GatewayVideo = ResultOf["videos"][number] +/** + * Selected-video query for limited imports. + * Fetches videos by ID with nested variants so we don't need a separate variant crawl. + * Uses `gql` from Apollo directly since this query shape is unique to limited imports. + */ +const SELECTED_VIDEOS_QUERY = gql` + query SelectedVideos($ids: [ID!]!) { + videos(where: { ids: $ids, published: true }, limit: 2000) { + id + slug + label + publishedAt + primaryLanguageId + locked + noIndex + source + origin { + id + name + description + } + title { + id + value + primary + language { + id + } + } + description { + id + value + primary + language { + id + } + } + snippet { + id + value + primary + language { + id + } + } + studyQuestions { + id + value + primary + order + language { + id + } + } + imageAlt { + id + value + primary + language { + id + } + } + bibleCitations { + id + osisId + chapterStart + chapterEnd + verseStart + verseEnd + order + bibleBook { + id + osisId + } + } + keywords { + id + } + images { + id + aspectRatio + mobileCinematicHigh + mobileCinematicLow + mobileCinematicVeryLow + thumbnail + videoStill + blurhash + url + } + subtitles { + id + primary + vttSrc + srtSrc + value + language { + id + } + videoEdition { + id + name + } + } + children { + id + } + variants { + id + slug + duration + lengthInMilliseconds + hls + dash + share + downloadable + published + brightcoveId + language { + id + } + videoEdition { + id + name + } + muxVideo { + id + assetId + playbackId + } + downloads { + id + quality + size + height + width + bitrate + url + } + } + } + } +` + +/** Variant shape nested inside the selected-videos query */ +export type SelectedVideoVariant = { + id: string + slug: string | null + duration: number + lengthInMilliseconds: number + hls: string | null + dash: string | null + share: string | null + downloadable: boolean + published: boolean + brightcoveId: string | null + language: { id: string } + videoEdition: { id: string; name: string | null } | null + muxVideo: { + id: string + assetId: string | null + playbackId: string | null + } | null + downloads: Array<{ + id: string + quality: string + size: number + height: number + width: number + bitrate: number + url: string + }> +} + +type SelectedVideo = GatewayVideo & { + variants: SelectedVideoVariant[] +} + async function syncSingleVideo( strapi: Core.Strapi, video: GatewayVideo, @@ -228,7 +408,7 @@ async function syncSingleVideo( { value: sq.value, order: sq.order, - video: { connect: [videoDocId] }, + video: localizedRelation(videoDocId), }, { locale: "en" }, ) @@ -254,8 +434,8 @@ async function syncSingleVideo( verseStart: bc.verseStart ?? undefined, verseEnd: bc.verseEnd ?? undefined, order: bc.order, - bibleBook: clearableRelation(bookDocId), - video: { connect: [videoDocId] }, + bibleBook: localizedRelation(bookDocId), + video: localizedRelation(videoDocId), }, ) } catch (error) { @@ -277,7 +457,6 @@ async function syncSingleVideo( documentId: videoDocId, data: { keywords: keywordDocIds }, locale: "en", - status: "published", }) } catch (error) { strapi.log.warn( @@ -325,9 +504,9 @@ async function syncSingleVideo( srtSrc: subtitle.srtSrc ?? undefined, value: subtitle.value, edition: subtitle.videoEdition?.name ?? undefined, - language: clearableRelation(langDocId), + language: localizedRelation(langDocId), videoEdition: clearableRelation(editionDocId), - video: { connect: [videoDocId] }, + video: localizedRelation(videoDocId), }, ) } catch (error) { @@ -344,37 +523,42 @@ async function syncSingleVideo( : "skipped" } -export async function syncVideos(strapi: Core.Strapi): Promise { +/** + * Full sync: paginate all published videos from gateway. + * Limited sync: fetch only the resolved selected video IDs. + */ +export async function syncVideos( + strapi: Core.Strapi, + selection: SyncSelection, +): Promise { const stats: SyncStats = { created: 0, updated: 0, softDeleted: 0, errors: 0 } const pageSize = getPageSize() - strapi.log.info("[gateway-sync] Starting video sync") + strapi.log.info( + `[gateway-sync] Starting video sync (${selection.isFullSync ? "full" : "limited"})`, + ) - // Get total count from gateway for comparison - let gatewayTotal = 0 + // First pass: sync all BibleBooks (needed before bible citations) try { - const { data: countData } = await getGatewayClient().query({ - query: VIDEOS_COUNT_QUERY, + const { data: bibleData, error } = await getGatewayClient().query({ + query: BIBLE_BOOKS_QUERY, }) - gatewayTotal = countData.videosCount - strapi.log.info( - `[gateway-sync] Gateway reports ${gatewayTotal} published videos`, - ) - } catch (error) { - strapi.log.warn( - `[gateway-sync] Failed to fetch video count: ${formatError(error)}`, - ) - } - // First pass: sync all BibleBooks (needed before bible citations) - try { - const bibleData = ( - await getGatewayClient().query({ query: BIBLE_BOOKS_QUERY }) - ).data + if (error) { + strapi.log.warn( + `[gateway-sync] Bible books query returned errors: ${error.message}`, + ) + } + + const bibleBooks = bibleData?.bibleBooks + if (!bibleBooks || bibleBooks.length === 0) { + throw new Error("Gateway returned no bible books") + } + strapi.log.info( - `[gateway-sync] Fetched ${bibleData.bibleBooks.length} bible books from gateway`, + `[gateway-sync] Fetched ${bibleBooks.length} bible books from gateway`, ) - for (const book of bibleData.bibleBooks) { + for (const book of bibleBooks) { const primaryName = getPrimaryValue(book.name) await upsertByGatewayId( strapi, @@ -415,6 +599,44 @@ export async function syncVideos(strapi: Core.Strapi): Promise { `[gateway-sync] Loaded caches: ${languageMap.size} languages, ${bibleBookMap.size} bible books, ${keywordMap.size} keywords`, ) + const caches = { originMap, languageMap, bibleBookMap, keywordMap } + + // Branch: limited vs full sync + if (!selection.isFullSync) { + return syncVideosLimited(strapi, selection, stats, caches) + } + + return syncVideosFull(strapi, stats, pageSize, caches) +} + +/** Full sync: paginate all published videos, soft-delete unseen */ +async function syncVideosFull( + strapi: Core.Strapi, + stats: SyncStats, + pageSize: number, + caches: { + originMap: Map + languageMap: Map + bibleBookMap: Map + keywordMap: Map + }, +): Promise { + // Get total count from gateway for comparison + let gatewayTotal = 0 + try { + const { data: countData } = await getGatewayClient().query({ + query: VIDEOS_COUNT_QUERY, + }) + gatewayTotal = countData.videosCount + strapi.log.info( + `[gateway-sync] Gateway reports ${gatewayTotal} published videos`, + ) + } catch (error) { + strapi.log.warn( + `[gateway-sync] Failed to fetch video count: ${formatError(error)}`, + ) + } + const seenVideoIds = new Set() const seenSubtitleIds = new Set() let offset = 0 @@ -450,7 +672,7 @@ export async function syncVideos(strapi: Core.Strapi): Promise { // Pre-pass: upsert all VideoOrigins from this page (hoisted map persists across pages) for (const video of videos) { - if (video.origin && !originMap.has(video.origin.id)) { + if (video.origin && !caches.originMap.has(video.origin.id)) { try { const { documentId } = await upsertByGatewayId( strapi, @@ -461,7 +683,7 @@ export async function syncVideos(strapi: Core.Strapi): Promise { description: video.origin.description ?? undefined, }, ) - originMap.set(video.origin.id, documentId) + caches.originMap.set(video.origin.id, documentId) } catch (error) { strapi.log.warn( `[gateway-sync] Failed to upsert video origin ${video.origin.id}: ${formatError(error)}`, @@ -475,12 +697,7 @@ export async function syncVideos(strapi: Core.Strapi): Promise { for (const s of video.subtitles) seenSubtitleIds.add(s.id) try { - const result = await syncSingleVideo(strapi, video, { - originMap, - languageMap, - bibleBookMap, - keywordMap, - }) + const result = await syncSingleVideo(strapi, video, caches) if (result === "created") stats.created++ else if (result === "updated") stats.updated++ } catch (error) { @@ -529,3 +746,145 @@ export async function syncVideos(strapi: Core.Strapi): Promise { return stats } + +/** + * Limited sync: fetch only selected videos by ID, including nested variants. + * No soft-delete — limited imports are always additive. + * Returns collected variant data for the variant phase to consume. + */ +/** Batch size for SELECTED_VIDEOS_QUERY to stay under gateway complexity limits */ +const SELECTED_VIDEOS_BATCH_SIZE = 10 + +async function fetchSelectedVideosBatched( + strapi: Core.Strapi, + ids: string[], +): Promise { + const allVideos: SelectedVideo[] = [] + const batches: string[][] = [] + + for (let i = 0; i < ids.length; i += SELECTED_VIDEOS_BATCH_SIZE) { + batches.push(ids.slice(i, i + SELECTED_VIDEOS_BATCH_SIZE)) + } + + strapi.log.info( + `[gateway-sync] Fetching ${ids.length} selected videos in ${batches.length} batches of up to ${SELECTED_VIDEOS_BATCH_SIZE}`, + ) + + for (let i = 0; i < batches.length; i++) { + const batch = batches[i] + const { data, error } = await getGatewayClient().query<{ + videos: SelectedVideo[] + }>({ + query: SELECTED_VIDEOS_QUERY, + variables: { ids: batch }, + }) + + if (error) { + strapi.log.warn( + `[gateway-sync] Batch ${i + 1}/${batches.length} returned errors: ${error.message}`, + ) + } + + const videos = data?.videos + if (videos) { + allVideos.push(...videos) + } else { + strapi.log.error( + `[gateway-sync] Batch ${i + 1}/${batches.length} returned null data — skipping ${batch.length} IDs`, + ) + } + } + + return allVideos +} + +async function syncVideosLimited( + strapi: Core.Strapi, + selection: SyncSelection, + stats: SyncStats, + caches: { + originMap: Map + languageMap: Map + bibleBookMap: Map + keywordMap: Map + }, +): Promise { + strapi.log.info( + `[gateway-sync] Fetching ${selection.resolvedVideoIds.length} selected videos from gateway`, + ) + + let videos: SelectedVideo[] + try { + videos = await fetchSelectedVideosBatched( + strapi, + selection.resolvedVideoIds, + ) + } catch (error) { + strapi.log.error( + `[gateway-sync] Failed to fetch selected videos: ${formatError(error)}`, + ) + return stats + } + + strapi.log.info( + `[gateway-sync] Gateway returned ${videos.length} of ${selection.resolvedVideoIds.length} requested videos`, + ) + + // Pre-pass: upsert all VideoOrigins + for (const video of videos) { + if (video.origin && !caches.originMap.has(video.origin.id)) { + try { + const { documentId } = await upsertByGatewayId( + strapi, + "api::video-origin.video-origin", + video.origin.id, + { + name: video.origin.name, + description: video.origin.description ?? undefined, + }, + ) + caches.originMap.set(video.origin.id, documentId) + } catch (error) { + strapi.log.warn( + `[gateway-sync] Failed to upsert video origin ${video.origin.id}: ${formatError(error)}`, + ) + } + } + } + + // Collect variant data for the variant sync phase + const collectedVariants: Array<{ + variant: SelectedVideoVariant + videoGatewayId: string + }> = [] + + for (const video of videos) { + try { + const result = await syncSingleVideo(strapi, video, caches) + if (result === "created") stats.created++ + else if (result === "updated") stats.updated++ + + // Collect variants from this video for the variant phase + for (const variant of video.variants) { + collectedVariants.push({ variant, videoGatewayId: video.id }) + } + } catch (error) { + stats.errors++ + strapi.log.warn( + `[gateway-sync] Failed to sync video ${video.id}: ${formatError(error)}`, + ) + } + } + + // Store collected variants on the selection for the variant phase to consume + ;( + selection as SyncSelection & { _collectedVariants?: unknown } + )._collectedVariants = collectedVariants + + // No soft-delete for limited imports + strapi.log.info( + `[gateway-sync] Limited video sync complete: ${stats.created} created, ${stats.updated} updated, ${stats.errors} errors (soft-delete skipped)`, + ) + + return stats +} diff --git a/docs/plans/2026-03-23-001-feat-staging-cms-collection-seed-import-plan.md b/docs/plans/2026-03-23-001-feat-staging-cms-collection-seed-import-plan.md new file mode 100644 index 00000000..912d509a --- /dev/null +++ b/docs/plans/2026-03-23-001-feat-staging-cms-collection-seed-import-plan.md @@ -0,0 +1,394 @@ +--- +title: "feat: Staging CMS Collection Seed Import" +type: feat +status: completed +date: 2026-03-23 +origin: docs/brainstorms/2026-03-19-cms-gateway-sync-requirements.md +--- + +# Staging CMS Collection Seed Import + +## Overview + +Extend the existing Strapi `gateway-sync` pipeline so staging and local environments can seed a limited, operator-selected slice of gateway media data into CMS. The operator supplies coverage-style collection IDs and/or explicit video IDs, and the sync resolves them into the concrete video graph automatically before importing only that subset. + +This keeps `apps/cms` as the canonical source of content while avoiding a full gateway import just to unblock `apps/manager` development. + +## Problem Statement / Motivation + +The current gateway sync in `apps/cms` is optimized for full synchronization: + +- `POST /gateway-sync/trigger` accepts only `scope` +- `sync-videos.ts` paginates across all published videos +- `sync-video-variants.ts` paginates across all variants +- soft-delete passes assume a full authoritative crawl + +That does not match the current staging need: + +- CMS is still effectively empty for manager development +- asking an operator to manually discover every child video ID is too expensive +- manager coverage semantics are based on top-level labeled videos plus `children`, not on a separate collection content type +- limited seed imports must be additive and safe, not globally destructive + +The old VideoForge coverage flow already established the grouping model: a “collection” is the top-level video row returned by the gateway coverage query, and the actual selectable media are the child video IDs, or the top-level video itself when no children exist. + +## Proposed Solution + +Extend the existing admin-authenticated gateway sync trigger to accept optional selection inputs: + +- `collectionIds?: string[]` +- `videoIds?: string[]` +- `dryRun?: boolean` + +Behavior is inferred, not declared: + +- if both `collectionIds` and `videoIds` are empty, run the existing full sync behavior +- if either list has values, run a limited seed import + +In limited runs: + +1. Resolve each supplied collection ID into an array of video IDs +2. Union that result with any explicit `videoIds` +3. Dedupe into the final selected video set +4. Import only that selected video graph into Strapi +5. Skip global soft-delete behavior + +The reusable core is a collection-to-video transformer function in the CMS gateway-sync layer. The trigger becomes the staging-friendly entry point, but the resolution logic remains reusable for later CLI or admin UI affordances. + +## Technical Approach + +### Collection semantics + +This plan treats **collection IDs as coverage-style collection IDs**, which in practice are the gateway IDs of top-level labeled videos. They are **not** `JourneyCollection.id` values from the separate journeys graph. + +Resolution rule: + +- if the selected top-level video has one or more `children`, the collection expands to those child video IDs +- if the selected top-level video has no `children`, the collection expands to its own ID + +### Public interface changes + +#### `apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts` + +Extend the trigger body from: + +```ts +// apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts +type TriggerBody = { + scope?: string | string[] +} +``` + +to: + +```ts +// apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts +type TriggerBody = { + scope?: string | string[] + collectionIds?: string[] + videoIds?: string[] + dryRun?: boolean +} +``` + +Example request: + +```json +{ + "scope": ["languages", "countries", "keywords", "videos", "video-variants"], + "collectionIds": ["top-level-video-a", "top-level-video-b"], + "videoIds": ["standalone-video-c"], + "dryRun": true +} +``` + +Example dry-run response: + +```json +{ + "message": "Gateway sync dry run complete", + "isFullSync": false, + "requestedCollectionIds": ["top-level-video-a", "top-level-video-b"], + "requestedVideoIds": ["standalone-video-c"], + "collectionVideoIds": { + "top-level-video-a": ["child-1", "child-2"], + "top-level-video-b": ["top-level-video-b"] + }, + "resolvedVideoIds": [ + "child-1", + "child-2", + "top-level-video-b", + "standalone-video-c" + ], + "missingCollectionIds": [], + "phases": ["languages", "countries", "keywords", "videos", "video-variants"] +} +``` + +### New internal selection contract + +Add a shared selection context inside the gateway-sync service layer: + +```ts +// apps/cms/src/api/gateway-sync/services/gateway-sync.ts +type SyncSelection = { + collectionIds: string[] + videoIds: string[] + resolvedVideoIds: string[] + collectionVideoIds: Record + missingCollectionIds: string[] + isFullSync: boolean + dryRun: boolean +} +``` + +This object is passed to the video-related phases. Languages, countries, and keywords can ignore it. + +### New collection-to-video transformer + +Add a reusable resolver in the gateway-sync service layer: + +```ts +// apps/cms/src/api/gateway-sync/services/resolve-collection-video-ids.ts +export type ResolveCollectionVideoIdsInput = { + collectionIds: string[] +} + +export type ResolveCollectionVideoIdsResult = { + collectionVideoIds: Record + resolvedVideoIds: string[] + missingCollectionIds: string[] +} +``` + +Implementation details: + +- query gateway `videos(where: { ids: [...] })` +- request: + - `id` + - `label` + - `children { id }` +- only coverage-style top-level IDs are expected +- map each returned row to: + - `children[].id` when children exist + - `[row.id]` when no children exist +- compute `missingCollectionIds` from requested IDs not returned by the gateway +- flatten and dedupe into `resolvedVideoIds` + +Recommended GraphQL document: + +```graphql +# apps/cms/src/api/gateway-sync/services/resolve-collection-video-ids.ts +query ResolveCollectionVideoIds($ids: [ID!]!) { + videos(where: { ids: $ids, published: true }, limit: 2000) { + id + label + children { + id + } + } +} +``` + +### Limited sync execution model + +#### Full sync + +Preserve the current behavior: + +- `sync-videos.ts` paginates all published videos +- `sync-video-variants.ts` paginates all variants +- soft-delete passes remain enabled + +#### Limited sync + +Infer limited mode from `resolvedVideoIds.length > 0`. + +In limited mode: + +- `sync-videos.ts` must fetch only selected videos from the gateway, not all published videos +- `sync-video-variants.ts` must sync only variants belonging to the selected videos +- global `softDeleteUnseen()` passes must be skipped + +Because the generated gateway schema supports `videos(where: { ids: [...] })` and nested `variants`, but does **not** expose a `videoVariants` query filtered by video IDs, the limited implementation should avoid a global variant crawl. + +Instead: + +1. Add a dedicated selected-videos query in `sync-videos.ts` that fetches: + - all current video fields needed by the video sync + - nested `variants` with the fields currently used by `sync-video-variants.ts` +2. Reuse a shared helper to upsert variants from an in-memory batch derived from the selected video rows +3. Keep the existing paginated `videoVariants` path only for full syncs + +This yields efficient limited imports without changing the full-sync strategy. + +### Phase-level behavior + +#### Languages + +- continue current full sync behavior in both full and limited runs +- no special selection logic +- keep current locale registration behavior + +#### Countries + +- continue current full sync behavior in both full and limited runs +- no selection logic + +#### Keywords + +- continue current full sync behavior in both full and limited runs +- keeps manager metadata and coverage displays usable without partial keyword gaps + +#### Videos + +- full run: existing paginated `published: true` crawl +- limited run: selected-video query using resolved ID list +- preserve existing upsert-by-gateway-ID behavior +- preserve `Video.label`, `childGatewayIds`, subtitles, bible citations, study questions, images, origins, and keyword relations + +#### Video variants + +- full run: existing paginated `videoVariants` crawl +- limited run: flatten nested `video.variants` from the selected-video query +- preserve edition and mux-video pre-pass behavior +- keep using the current Strapi relation-clearing pattern via `clearableRelation()` + +### Destructive-safety rules + +Limited imports are always additive and non-destructive. + +That means: + +- no `softDeleteUnseen()` during limited runs +- no cleanup based on “not seen in this run” +- existing `source: "manager"` protection remains unchanged +- rerunning the same limited seed import must remain idempotent + +### Environment and rollout guard + +This workflow is primarily for staging and local environments. Add an env guard such as: + +- `GATEWAY_SYNC_ENABLE_LIMITED_IMPORT=true` + +Behavior: + +- full sync remains available everywhere +- limited seed import request bodies are rejected unless the env guard is enabled + +This prevents accidental production use while keeping staging simple. + +### Operator flow + +```mermaid +flowchart TD + A["Operator identifies coverage-style collection IDs"] --> B["POST /api/gateway-sync/trigger with collectionIds, videoIds?, dryRun?"] + B --> C["Resolve collection IDs to child/self video IDs"] + C --> D{"dryRun?"} + D -->|yes| E["Return collectionVideoIds, resolvedVideoIds, missingCollectionIds"] + D -->|no| F["Run selected sync phases"] + F --> G["Upsert CMS videos, subtitles, variants, relations"] + G --> H["Manager follow-up can read CMS as canonical source"] +``` + +## Implementation Phases + +### Phase 1: Trigger and selection plumbing + +- [ ] Extend `gateway-sync.trigger` request body to accept `collectionIds`, `videoIds`, and `dryRun` +- [ ] Add a `SyncSelection` contract in `services/gateway-sync.ts` +- [ ] Infer `isFullSync` from empty vs non-empty selection lists +- [ ] Reject limited imports when the staging/local env guard is disabled +- [ ] Preserve current background execution for non-dry-run imports + +### Phase 2: Collection transformer + +- [ ] Create `services/resolve-collection-video-ids.ts` +- [ ] Add typed GraphQL query for top-level video lookup by ID +- [ ] Map returned rows to `collectionVideoIds` +- [ ] Return `resolvedVideoIds` and `missingCollectionIds` +- [ ] Unit test child-expansion, self-expansion, dedupe, and missing-ID behavior + +### Phase 3: Limited video sync + +- [ ] Add selected-video query path to `sync-videos.ts` +- [ ] Share video upsert logic across full and limited runs +- [ ] Flatten nested variants from selected videos into a reusable batch shape +- [ ] Extract shared variant upsert helper from `sync-video-variants.ts` +- [ ] Skip `softDeleteUnseen()` when running limited imports + +### Phase 4: Dry run and observability + +- [ ] Add dry-run controller response with resolved IDs and missing IDs +- [ ] Include `isFullSync` and selected phase list in the response +- [ ] Log when a run is full vs limited +- [ ] Log collection resolution mismatches and missing collection IDs clearly + +### Phase 5: Validation against manager needs + +- [ ] Verify imported videos preserve `label` + `children` semantics expected by the legacy coverage model +- [ ] Verify imported variants retain Mux asset mappings needed for selection/submission flows +- [ ] Document that this plan seeds CMS only; the manager-side `/dashboard/coverage` data-source rewrite remains a separate implementation step + +## Acceptance Criteria + +- [ ] Admin trigger accepts `collectionIds`, `videoIds`, and `dryRun` +- [ ] Empty selection inputs still run the existing full sync behavior +- [ ] Supplying coverage-style collection IDs resolves to concrete child/self video IDs automatically +- [ ] Supplying both collection IDs and explicit video IDs produces a deduped union +- [ ] Dry run returns the resolved mapping without writing CMS data +- [ ] Limited imports do not soft-delete unrelated CMS content +- [ ] Re-running the same limited import is idempotent +- [ ] Selected videos import with the same gateway-backed fields used by manager coverage: labels, child IDs, subtitles, variants, images, keywords, study questions, and bible citations +- [ ] Limited import can be enabled on staging without changing full-sync behavior elsewhere + +## Success Metrics + +- A staging operator can seed a small CMS dataset from a handful of collection IDs without manually sourcing every video ID +- A dry run makes it obvious which requested collections mapped successfully and which did not +- The limited import path does not perform a full gateway crawl for videos or variants +- Seeded CMS content matches the legacy coverage grouping model closely enough for manager follow-up work + +## Dependencies & Risks + +### Dependencies + +- Existing `apps/cms/src/api/gateway-sync/` service structure +- Gateway `videos(where: { ids: [...] })` support +- Nested `Video.children` and `Video.variants` availability in the gateway schema +- Admin-authenticated `POST /gateway-sync/trigger` + +### Risks + +- **Wrong collection ID type**: if operators supply `JourneyCollection.id` values instead of coverage-style top-level video IDs, resolution will fail. The trigger contract and docs must state the expected ID type clearly. +- **Gateway payload drift**: if the selected-video query omits fields currently relied on by the full sync helpers, limited imports will produce incomplete CMS records. +- **Relation clearing regressions**: variant and subtitle upserts must keep the existing Strapi v5 `{ set: [] }` semantics via `clearableRelation()` when source relations are missing. +- **Large operator selection**: a very large `collectionIds` list could behave like a near-full import. Request validation should cap total IDs per run. + +## References & Research + +### Internal References + +- `docs/brainstorms/2026-03-19-cms-gateway-sync-requirements.md` +- `docs/plans/2026-03-19-001-feat-cms-gateway-data-sync-plan.md` +- `apps/cms/src/api/gateway-sync/services/gateway-sync.ts` +- `apps/cms/src/api/gateway-sync/controllers/gateway-sync.ts` +- `apps/cms/src/api/gateway-sync/services/sync-videos.ts` +- `apps/cms/src/api/gateway-sync/services/sync-video-variants.ts` +- `apps/manager/src/app/dashboard/coverage/page.tsx` + +### Relevant Learnings + +- `docs/solutions/integration-issues/strapi-v5-manytone-relation-clearing.md` + - keep using `{ set: [] }` via `clearableRelation()` for missing Strapi many-to-one relations +- `docs/solutions/platform/restoring-upstream-ui-verbatim.md` + - preserve legacy coverage semantics instead of inventing a new grouping model during migration +- `apps/manager/CLAUDE.md` + - current coverage is still blocked on replacing file-based job-state assumptions with a durable CMS-backed source + +## Out of Scope + +- Rewriting `apps/manager` coverage page in this work item +- Importing journeys or `JourneyCollection` records +- Real-time sync, webhook sync, or bidirectional updates back to the gateway +- Building a Strapi admin UI button beyond the existing admin-authenticated trigger endpoint diff --git a/docs/plans/2026-03-25-001-fix-cms-gateway-sync-video-child-publishing-plan.md b/docs/plans/2026-03-25-001-fix-cms-gateway-sync-video-child-publishing-plan.md new file mode 100644 index 00000000..fa8c8fb1 --- /dev/null +++ b/docs/plans/2026-03-25-001-fix-cms-gateway-sync-video-child-publishing-plan.md @@ -0,0 +1,217 @@ +--- +title: "fix: CMS Gateway Sync Video Child Publishing" +type: fix +status: in_progress +date: 2026-03-25 +origin: docs/brainstorms/2026-03-19-cms-gateway-sync-requirements.md +--- + +# CMS Gateway Sync Video Child Publishing + +## Overview + +Fix the Strapi v5 draft/publish regression in `apps/cms` so a limited gateway import can reliably bring in collection `7_0-nfs01`, its parent video, its 3 episode videos, and all related child content without leaving support records draft-only or depending on SQL link-table repair. + +The proper fix is to keep the importer inside Strapi's Document Service model, publish records in dependency order, and make create and update paths use the same draft-first flow. + +## Problem Statement / Motivation + +Current local testing shows the importer is still not reliable for relation-heavy child types: + +- base `video` records can import and publish successfully +- `video-subtitle`, `video-variant`, `bible-citation`, and `video-study-question` can still fail publish with `1 relation(s) of type api::video.video associated with this entity do not exist` +- the current code mixes Document Service writes with low-level `strapi.db.query(...)` reads and raw Knex link-table rewrites, which bypasses Strapi's document model and make failures harder to reason about +- the update path is not symmetrical with the create path: new records are created as drafts, but existing gateway-owned records are still updated with `status: "published"` inline +- the current bug writeup in `docs/solutions/cms/gateway-sync-limited-import-testing-bugs.md` says `docs.publish({ documentId })` alone is sufficient, but the current local behavior on Strapi `5.36.0` contradicts that assumption + +This blocks trustworthy local and staging seed imports and creates false-positive "successful import" reports when child content remains unpublished. + +## Proposed Solution + +Refactor gateway sync so all gateway-owned upserts follow a single document-safe flow: + +1. sync lookup and reference content as drafts +2. sync base video documents as drafts +3. publish reference types and videos +4. sync child content that depends on published videos +5. publish child content +6. rerun the same staged flow for updates, without special SQL repair paths + +This keeps relation writes inside the Document Service, removes row-id repair logic, and makes publish order explicit instead of relying on a final bulk sweep to recover from earlier draft-only writes. + +## Implementation Notes + +The current implementation reached the clean-import acceptance criteria without extracting a separate +base-video/child-content phase inside `sync-videos.ts`, but warm reruns are still open: + +- make gateway-owned creates and updates draft-first +- normalize `video` to a direct many-to-one document relation +- remove SQL join-table repair +- make publish discovery operate on document IDs that still have no published row +- fail the sync when a publish stage has real failures + +## Technical Approach + +### Flow changes + +```mermaid +flowchart TD + A["Trigger limited import for 7_0-nfs01"] --> B["Resolve parent + child video IDs"] + B --> C["Sync reference types as drafts"] + C --> D["Sync base video documents as drafts"] + D --> E["Publish reference types + videos"] + E --> F["Sync child entities that point at video"] + F --> G["Publish child entities"] + G --> H["Return success only if all required publish stages pass"] +``` + +## Implementation Phases + +### Phase 1: Normalize gateway upsert semantics + +- Change `upsertByGatewayId()` in `apps/cms/src/api/gateway-sync/services/strapi-helpers.ts` so gateway-owned records update as drafts too, not with inline `status: "published"`. +- Keep manager-owned records skipped exactly as today. +- Keep `publishDrafts()` as the single publish entry point for gateway-owned records. +- Change `publishDrafts()` to return structured results instead of a bare count: + - `published` + - `failed` + - `failedDocumentIds` +- Treat publish failures for required content types as sync failures, not warning-only noise. + +### Phase 2: Remove mixed relation strategies + +- Remove `repairVideoChildRelationLinks()` and any raw Knex writes to `*_lnk` tables from `apps/cms/src/api/gateway-sync/services/strapi-helpers.ts`. +- Keep entity-level `strapi.db.query(...)` only for discovering draft rows; do not use it to mutate relations. +- Normalize many-to-one relation payloads to the document-safe single-value form: + - `video: videoDocId` + - `language: clearableRelation(langDocId)` + - `videoEdition: clearableRelation(editionDocId)` + - `muxVideo: clearableRelation(muxDocId)` + - `bibleBook: clearableRelation(bookDocId)` +- Reserve `connect` only for actual multi-relation fields. + +### Phase 3: Split base-video sync from child-content sync + +- Refactor `apps/cms/src/api/gateway-sync/services/sync-videos.ts` so base video upsert is separated from child entity upserts. +- Base video sync should own: + - core scalar fields + - origin + - primary language + - images + - child gateway IDs + - keyword relation update +- Child-content sync should own: + - study questions + - bible citations + - subtitles +- Child-content sync must run only after the relevant `video` documents have been published. +- Keep the existing "include the parent collection video in resolved IDs" behavior for limited imports. + +### Phase 4: Make stage ordering explicit in the orchestrator + +- Replace the current single end-of-run `CONTENT_TYPES_TO_PUBLISH` sweep in `apps/cms/src/api/gateway-sync/services/gateway-sync.ts` with stage-scoped publish steps. +- Use this publish order: + 1. reference and lookup types: `continent`, `language`, `country`, `country-language`, `keyword`, `bible-book`, `video-origin`, `video-edition`, `mux-video` + 2. `video` + 3. child types: `video-subtitle`, `video-variant`, `bible-citation`, `video-study-question` +- Fail the sync result if a required publish stage has any failures. +- Keep limited import scope and dry-run behavior unchanged. + +### Phase 5: Make reruns first-class + +- Ensure the second import of the same collection uses the same draft-first, publish-later flow for updated documents. +- Do not preserve the current split behavior where create paths rely on `publishDrafts()` but update paths publish inline. +- Keep relation-clearing behavior from `docs/solutions/integration-issues/strapi-v5-manytone-relation-clearing.md`: + - use `{ set: [] }` for missing optional many-to-one relations + - never pass `null` + - do not preserve stale relation references via `undefined` + +## Public Interfaces / Internal Contracts + +No external API changes are required for `POST /api/gateway-sync/trigger` or the limited import request shape. + +Internal service contracts should change as follows: + +- `publishDrafts(strapi, uid)` returns a structured result instead of `number` +- `sync-videos.ts` exposes separate helpers for base video sync and child-content sync +- gateway sync treats publish failures as first-class sync errors + +## Acceptance Criteria + +- [x] Fresh local import of collection `7_0-nfs01` completes without `api::video.video do not exist` publish errors. +- [x] The imported set includes exactly 4 `Video` documents: parent `7_0-nfs01` plus `7_0-nfs0101`, `7_0-nfs0102`, and `7_0-nfs0103`. +- [x] Parent video `7_0-nfs01` is published and its `children` relation shows the 3 episode videos in Strapi admin. +- [x] `video-subtitle`, `video-variant`, `bible-citation`, and `video-study-question` all have published rows after the run, not draft-only rows. +- [ ] Re-running the same limited import on a warm DB completes without child publish regressions. +- [x] The importer no longer writes directly to `*_lnk` tables anywhere in the gateway-sync path. +- [x] A publish-stage failure increments sync errors and fails the run instead of reporting a false success. + +## Validation Plan + +### Required manual regression + +- Reset the local CMS DB. +- Run the existing local testing flow from `docs/solutions/cms/gateway-sync-local-testing.md`. +- Trigger a limited import for `7_0-nfs01`. +- Verify in Strapi admin: + - `Video` shows the 4 expected records + - the parent video is published + - each child-supporting type has published records +- Verify in SQLite or DB inspection: + - published rows exist for `video_subtitles`, `video_variants`, `bible_citations`, and `video_study_questions` + - no required type has `0` published rows after the run +- Repeat the same import without wiping the DB and confirm the update path remains clean. + +### Observability checks + +- Logs must show publish counts and failures per stage. +- Status output must reflect publish-stage failures in the final result, not just warn in the background. + +## Dependencies & Risks + +### Dependencies + +- Existing gateway-sync structure under `apps/cms/src/api/gateway-sync/` +- Existing limited-import resolution behavior that already includes the parent collection video +- Existing local validation runbook in `docs/solutions/cms/gateway-sync-local-testing.md` + +### Risks + +- **Strapi 5.36.0 document-service bug persists even after staged publishing**: if the Document Service only flow still fails after this refactor, stop adding importer workarounds and capture a minimal upstream reproduction instead. +- **Silent success regression**: if publish failures remain warning-only, operators will continue trusting incomplete imports. +- **Update-path drift**: fixing only fresh creates will leave reruns broken; the plan must unify create and update semantics. +- **Over-coupled video sync helper**: if `sync-videos.ts` is not actually split cleanly, child content may still be created before videos are publishable. + +## Alternative Approaches Considered + +- **Raw join-table repair**: rejected. It mixes row-level DB mutation with Strapi document writes and is exactly the incompatible strategy called out by current upstream guidance. +- **Disable Draft & Publish on child content types**: rejected for this fix. It changes editorial behavior and avoids the bug by changing the model rather than making the importer correct. +- **Large document middleware refactor**: deferred. Use a repo-local Document Service only fix first; only escalate to middleware work if the staged flow still reproduces the bug. + +## References & Research + +### Internal references + +- `docs/brainstorms/2026-03-19-cms-gateway-sync-requirements.md` +- `docs/plans/2026-03-19-001-feat-cms-gateway-data-sync-plan.md` +- `docs/plans/2026-03-23-001-feat-staging-cms-collection-seed-import-plan.md` +- `docs/solutions/cms/gateway-sync-local-testing.md` +- `docs/solutions/cms/gateway-sync-limited-import-testing-bugs.md` +- `docs/solutions/integration-issues/strapi-v5-manytone-relation-clearing.md` +- `apps/cms/src/api/gateway-sync/services/strapi-helpers.ts` +- `apps/cms/src/api/gateway-sync/services/gateway-sync.ts` +- `apps/cms/src/api/gateway-sync/services/sync-videos.ts` +- `apps/cms/src/api/gateway-sync/services/sync-video-variants.ts` + +### External references + +- Strapi Document Service API: https://docs.strapi.io/cms/api/document-service +- Strapi relations docs: https://docs.strapi.io/cms/api/rest/relations +- Strapi issue `#24850` on relation handling with `documentId`: https://github.com/strapi/strapi/issues/24850 +- Strapi issue `#23460` on published relation breakage after republish: https://github.com/strapi/strapi/issues/23460 + +## Documentation Follow-up + +- Update `docs/solutions/cms/gateway-sync-limited-import-testing-bugs.md` to remove the now-stale claim that a final `docs.publish({ documentId })` sweep alone solves child publishing. +- Add a short "clean run + rerun" verification section for `7_0-nfs01` to `docs/solutions/cms/gateway-sync-local-testing.md`. +- Record the final root cause and chosen staged-publish design once verified locally. diff --git a/docs/solutions/cms/gateway-sync-limited-import-testing-bugs.md b/docs/solutions/cms/gateway-sync-limited-import-testing-bugs.md new file mode 100644 index 00000000..3cccd61c --- /dev/null +++ b/docs/solutions/cms/gateway-sync-limited-import-testing-bugs.md @@ -0,0 +1,305 @@ +--- +title: "Gateway Sync Limited Import: Bugs Found During Local Testing" +category: cms +date: 2026-03-23 +tags: + [ + gateway-sync, + limited-import, + apollo-client, + query-complexity, + null-safety, + strapi-v5, + error-handling, + entity-validator, + draft-publish, + ] +symptoms: + - "TypeError: Cannot read properties of undefined (reading 'keywords') at syncKeywords" + - "TypeError: Cannot read properties of null (reading 'videos') at syncVideosLimited" + - "Gateway returns INTERNAL_SERVER_ERROR with data: null for large collections" + - "ValidationError: 1 relation(s) of type api::video-edition.video-edition associated with this entity do not exist" +component: gateway-sync +pr: "#514" +related: + - docs/solutions/cms/gateway-sync-local-testing.md + - docs/solutions/integration-issues/strapi-v5-manytone-relation-clearing.md +--- + +# Gateway Sync Limited Import: Bugs Found During Local Testing + +Discovered during local testing of the PR #514 limited seed import feature. Three bugs +found and all three resolved. + +--- + +## Bug 1: Keywords Sync — Missing Null Guard on Apollo `data` (FIXED) + +### Symptom + +``` +[gateway-sync] Sync failed after 68.8s: TypeError: Cannot read properties of undefined (reading 'keywords') + at syncKeywords (sync-keywords.js:27:27) +``` + +### Root Cause + +`sync-keywords.ts` destructures `data` from the Apollo response and immediately accesses +`.keywords` without a null check: + +```ts +// sync-keywords.ts lines 35–36 +const { data } = await getGatewayClient().query({ query: KEYWORDS_QUERY }) +const keywords = data.keywords // 💥 crashes if data is undefined +``` + +With `errorPolicy: 'all'` configured on the Apollo client, when the gateway returns +GraphQL errors alongside `"data": null`, Apollo resolves to `{ data: undefined, errors: [...] }`. +The destructure succeeds but `data` is `undefined`, so `.keywords` throws. + +### Fix (Applied) + +Changed to use Apollo's `error` field (singular `ApolloError`, not `errors` array) and +null-guard `data` before accessing `.keywords`: + +```ts +const { data, error } = await getGatewayClient().query({ + query: KEYWORDS_QUERY, +}) + +if (error) { + strapi.log.warn( + `[gateway-sync] Keywords query returned errors: ${error.message}`, + ) +} + +const keywords = data?.keywords +if (!keywords || keywords.length === 0) { + // circuit breaker: skip sync +} +``` + +**Key learning:** Apollo Client `query()` returns `{ data, error }` where `error` is an +`ApolloError` (with `.graphQLErrors` array inside), **not** a top-level `errors` array. +TypeScript will reject `{ data, errors }` destructuring. + +--- + +## Bug 2: Videos Limited Sync — Query Complexity Overflow for Large Collections (FIXED) + +### Symptom + +``` +[gateway-sync] Fetching 61 selected videos from gateway +[gateway-sync] Failed to fetch selected videos: TypeError: Cannot read properties of null (reading 'videos') + at syncVideosLimited (sync-videos.js:569:23) +[gateway-sync] Limited video sync complete: 0 created, 0 updated, 0 errors +``` + +Gateway response (verified via direct curl): + +```json +{ + "data": null, + "errors": [ + { + "message": "Unexpected error.", + "extensions": { "code": "INTERNAL_SERVER_ERROR" } + } + ] +} +``` + +### Root Cause + +`syncVideosLimited` sends **all** resolved video IDs in a single `SELECTED_VIDEOS_QUERY` +call. The JESUS Film collection resolves to 61 chapters. Each chapter has: + +- ~2200 variants (with downloads, muxVideo, subtitles) +- ~94 subtitles +- bibleCitations, images, keywords, children + +This query complexity exceeds the gateway's budget. The gateway returns `data: null` with +an `INTERNAL_SERVER_ERROR`. Apollo propagates `null` as `data`, so `data.videos` throws. + +Note: The `catch` block in `syncVideosLimited` (lines 767–771) does catch the error and +logs it correctly — but returns empty stats, making the failure silent in the final output +(`0c/0u/0d/0e` instead of a visible error count). + +### Workaround (Confirmed Working) + +Use explicit `videoIds` with a small batch instead of `collectionIds` for large collections: + +```json +POST /api/gateway-sync/trigger +{ + "videoIds": ["1_jf6101-0-0", "1_jf6102-0-0", "1_jf6103-0-0", "1_jf6104-0-0", "1_jf6105-0-0"], + "scope": ["videos", "video-variants"] +} +``` + +5 explicit IDs → gateway returned all 5 cleanly, `5c/0u/0d/0e`. + +### Fix (Applied) + +Extracted `fetchSelectedVideosBatched()` that splits IDs into chunks of +`SELECTED_VIDEOS_BATCH_SIZE` (currently 10). Each batch queries the gateway independently +with null-guard on the response. Failed batches log a warning and skip, allowing remaining +batches to succeed. + +**Files changed:** + +- `sync-videos.ts`: Added `fetchSelectedVideosBatched()`, `SELECTED_VIDEOS_BATCH_SIZE = 10` + +### Safe Batch Size Discovery + +| Batch size | Status | +| ---------- | --------------------------------- | +| 5 | ✅ Known safe | +| 10 | ✅ Current default | +| 15 | Untested | +| 20 | Untested | +| 61 | ❌ Triggers INTERNAL_SERVER_ERROR | + +--- + +## Local Testing Quick Reference + +```bash +# 1. Set env guard +echo "GATEWAY_SYNC_ENABLE_LIMITED_IMPORT=true" >> apps/cms/.env.local + +# 2. Start CMS +pnpm --filter cms dev + +# 3. Get API token (admin JWT doesn't work for content API) +ADMIN_TOKEN=$(curl -s -X POST http://localhost:1337/admin/login \ + -H "Content-Type: application/json" \ + -d '{"email":"admin@local.dev","password":"Admin1234!"}' | jq -r '.data.token') + +API_TOKEN=$(curl -s -X POST http://localhost:1337/admin/api-tokens \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -d '{"name":"test","type":"full-access","lifespan":null}' | jq -r '.data.accessKey') + +# 4. Dry-run to validate collection resolves +curl -s -X POST http://localhost:1337/api/gateway-sync/trigger \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + -d '{"collectionIds":["your-collection-id"],"dryRun":true}' | jq . + +# 5. Import — use small explicit videoIds to avoid query complexity limit +curl -s -X POST http://localhost:1337/api/gateway-sync/trigger \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + -d '{"videoIds":["id1","id2","id3","id4","id5"],"scope":["videos","video-variants"]}' | jq . + +# 6. Poll status +curl -s http://localhost:1337/api/gateway-sync/status \ + -H "Authorization: Bearer $API_TOKEN" | jq . +``` + +**Known safe collections for testing**: + +- `7_0-nfs01` — "How Did We Get Here? (Episode 1)" — 3 child episodes, 54 variants. Confirmed working. +- `1_jf-0-0` (JESUS Film, 61 chapters) — now works with batching but is very large (61 × ~2200 variants) + +--- + +## Bug 3: Strapi v5 Entity Validator Rejects Published Creates (FIXED) + +### Symptom + +``` +[gateway-sync] Failed to upsert variant 7_496-0-nfs0101: ValidationError: 1 relation(s) of +type api::video-edition.video-edition associated with this entity do not exist + at createEntry (entries.js:63:27) +``` + +All variant creates fail with this error even though the referenced video-edition records +exist in the database. + +### Root Cause + +Strapi v5 Document Service `create({status: "published"})` internally: + +1. Creates a **draft** row (passes entity validator with `isDraft: true`) +2. Deep-populates the draft's relation data from link tables +3. Calls `createEntry` again to create the **published** row + +Step 3 runs the entity validator with `isDraft: false`. The deep-populated relation data +from the draft's link tables fails validation during this internal publish step. The exact +mechanism involves how Strapi resolves relation IDs during the deep-populate/re-create cycle +— the IDs from the draft's link tables don't pass the published-status validation check. + +Key observation: draft creates with documentId strings for relations PASS validation (the +draft row and its link table entries are created successfully — confirmed by checking the +DB after failures). The error only occurs in the publish step's internal `createEntry`. + +### Fix (Applied) + +The final fix required more than "create as draft, then call `publish()` later." + +Applied changes: + +- `upsertByGatewayId` now writes gateway-owned records as drafts for both **create and update** +- `publishDrafts()` now discovers document IDs that still have **no published row**, instead of + paging over raw `published_at IS NULL` rows +- `publishDrafts()` now tracks attempted document IDs so a permanently failing publish cannot + spin forever in a hot loop +- `gateway-sync.ts` now treats publish failures as sync failures instead of warning-only noise +- the importer now uses direct many-to-one document relations for `video` (`video: videoDocId`) + instead of mixing in `connect` arrays and raw join-table repair +- the raw `repairVideoChildRelationLinks()` workaround was removed + +**Why this works:** + +1. Draft-first create/update still avoids Strapi's internal published-create validator problem +2. Publishing by document ID works once relation payloads stay inside Document Service semantics +3. Strapi v5 keeps a draft row after publish, so the publish finder must select document IDs + that do not yet have any published row; otherwise large types stall after the first batch +4. Warm rerun behavior is still unresolved on Strapi `5.36.0`; this fix only verifies the clean import path + +### Verified Result + +Clean local import of collection `7_0-nfs01` on Strapi `5.36.0`: + +``` +languages: 2280c/0u/0d/0e +countries: 240c/0u/0d/0e +keywords: 6030c/0u/0d/0e +videos: 4c/0u/0d/0e +video-variants: 72c/0u/0d/0e +Published 2280 draft language records +Published 6598 draft country-language records +Published 6030 draft keyword records +Published 1 draft video-origin records +Published 16 draft video-edition records +Published 52 draft mux-video records +Published 4 draft video records +Published 39 draft video-subtitle records +Published 72 draft video-variant records +Published 4 draft bible-citation records +Published 9 draft video-study-question records +``` + +Known limitation: warm rerun of the same collection is still not reliable on Strapi `5.36.0`. +The remaining failures are in the rerun/update path, not the clean import path documented here. + +--- + +## Prevention Checklist + +- [x] Null-guard every `const { data } = await getGatewayClient().query(...)` before accessing fields +- [x] Batch video ID queries via `fetchSelectedVideosBatched()` with `SELECTED_VIDEOS_BATCH_SIZE = 10` +- [x] Create drafts then bulk-publish to avoid Strapi v5 entity validator relation errors +- [ ] Always dry-run (`dryRun: true`) against an unknown collection before a live import +- [ ] Detect `INTERNAL_SERVER_ERROR` with `data: null` separately from partial-data errors + +--- + +## See Also + +- `docs/solutions/cms/gateway-sync-local-testing.md` — full local setup runbook +- `docs/solutions/integration-issues/strapi-v5-manytone-relation-clearing.md` — relation clearing patterns used in sync-videos +- PR #514: feat(cms): add limited seed import to gateway-sync pipeline diff --git a/docs/solutions/cms/gateway-sync-local-testing.md b/docs/solutions/cms/gateway-sync-local-testing.md new file mode 100644 index 00000000..79523f1a --- /dev/null +++ b/docs/solutions/cms/gateway-sync-local-testing.md @@ -0,0 +1,260 @@ +--- +title: "Gateway Sync: Local Testing Runbook" +category: cms +date: 2026-03-23 +tags: [gateway-sync, strapi, api-token, limited-import, testing, runbook] +--- + +# Gateway Sync: Local Testing Runbook + +## Context + +`POST /api/gateway-sync/trigger` drives both full syncs and limited (collection-scoped) seed imports. +This runbook covers end-to-end local validation: environment setup, auth, dry-run, live import, and guard testing. + +Related code: `apps/cms/src/api/gateway-sync/` +Related PR: #514 — feat(cms): add limited seed import to gateway-sync pipeline + +--- + +## Critical Auth Gotcha + +The gateway-sync routes use the `admin::isAuthenticatedAdmin` policy under `/api/` (content-API scope). + +**Admin JWT tokens return 401 on these routes.** The content-API auth middleware does not process admin JWTs — it only processes API tokens. Always generate a full-access API token (Step 2) and use that. + +--- + +## Step 1 — Environment setup + +```bash +# Fetch all secrets from Doppler +cd apps/cms && pnpm fetch-secrets + +# Append the limited-import feature flag to .env +echo "GATEWAY_SYNC_ENABLE_LIMITED_IMPORT=true" >> apps/cms/.env + +# Start Strapi +pnpm --filter cms dev +# Wait for: "Strapi started successfully" +``` + +--- + +## Step 2 — Create admin user (blank DB only) + +Skip if your `.tmp/data.db` already has an admin. + +```bash +curl -s -X POST http://localhost:1337/admin/register-admin \ + -H "Content-Type: application/json" \ + -d '{"firstname":"Admin","lastname":"Local","email":"admin@local.dev","password":""}' \ + | jq .data.user.email +``` + +--- + +## Step 3 — Generate a full-access API token + +```bash +ADMIN_TOKEN=$(curl -s -X POST http://localhost:1337/admin/login \ + -H "Content-Type: application/json" \ + -d '{"email":"admin@local.dev","password":""}' \ + | jq -r '.data.token') + +API_TOKEN=$(curl -s -X POST http://localhost:1337/admin/api-tokens \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -d '{"name":"test-local","description":"local testing","type":"full-access","lifespan":null}' \ + | jq -r '.data.accessKey') + +echo "API_TOKEN=$API_TOKEN" +``` + +Export this for subsequent steps: + +```bash +export API_TOKEN= +``` + +--- + +## Step 4 — Find valid gateway collection IDs + +The gateway uses its own ID format — not slugs. Known working IDs: + +| Content | Gateway ID | +| ------------------------------ | ----------- | +| JESUS film | `1_jf-0-0` | +| Life of Jesus (Gospel of John) | `2_GOJ-0-0` | +| How Did We Get Here? Episode 1 | `7_0-nfs01` | + +To discover IDs from the live gateway: + +```bash +# Feature films +curl -s -X POST https://api-gateway.central.jesusfilm.org/ \ + -H "Content-Type: application/json" \ + -d '{"query":"{ videos(where: { labels: [featureFilm], published: true }, limit: 20) { id label } }"}' \ + | jq . + +# Collections +curl -s -X POST https://api-gateway.central.jesusfilm.org/ \ + -H "Content-Type: application/json" \ + -d '{"query":"{ videos(where: { labels: [collection], published: true }, limit: 20) { id label } }"}' \ + | jq . +``` + +--- + +## Step 5 — Dry-run (resolves IDs, no import) + +```bash +curl -s -X POST http://localhost:1337/api/gateway-sync/trigger \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + -d '{"collectionIds":["1_jf-0-0"],"dryRun":true}' | jq . +``` + +**Healthy response:** + +```json +{ + "dryRun": { + "isFullSync": false, + "requestedCollectionIds": ["1_jf-0-0"], + "collectionVideoIds": { "1_jf-0-0": ["1_jf6101-0-0", "...61 total"] }, + "resolvedVideoIds": ["1_jf6101-0-0", "..."], + "missingCollectionIds": [] + } +} +``` + +If `missingCollectionIds` is non-empty: the ID is wrong or not published — revisit Step 4. + +--- + +## Step 6 — Run the limited import + +Returns `202 Accepted` immediately (fire-and-forget background job). + +```bash +curl -s -X POST http://localhost:1337/api/gateway-sync/trigger \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + -d '{"collectionIds":["1_jf-0-0"]}' | jq . +``` + +Watch the Strapi terminal for: + +``` +[gateway-sync] Limited import: 61 resolved video IDs from 1 collections +[gateway-sync] Limited video sync complete: X created, Y updated, 0 errors (soft-delete skipped) +[gateway-sync] Limited variant sync complete: X created, Y updated, 0 errors (soft-delete skipped) +``` + +--- + +## Step 7 — Poll sync status + +```bash +curl -s http://localhost:1337/api/gateway-sync/status \ + -H "Authorization: Bearer $API_TOKEN" | jq . +``` + +--- + +## Step 8 — Verify the guard (negative test) + +1. Remove `GATEWAY_SYNC_ENABLE_LIMITED_IMPORT=true` from `.env` +2. Restart Strapi +3. Repeat Step 6 + +Expected: error response refusing the limited import, not 202. + +--- + +## Validation checklist + +| Check | What to verify | +| ----------------------------- | ------------------------------------------------ | +| Only selected videos imported | Video count ≈ `resolvedVideoIds.length` | +| No soft-deletes | Strapi logs show `soft-delete skipped` | +| Idempotent | Second run shows `updated`, not `created` | +| Variants linked | Imported videos have variants in Content Manager | +| Guard works | Without env flag, limited import is rejected | + +--- + +## Verified regression scenario: `7_0-nfs01` + +Use this exact scenario to validate the draft/publish fix for relation-heavy child content. + +### Clean run + +```bash +curl -s -X POST http://localhost:1337/api/gateway-sync/trigger \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + -d '{"collectionIds":["7_0-nfs01"],"scope":["languages","countries","keywords","videos","video-variants"]}' | jq . +``` + +Expected terminal log summary: + +```text +[gateway-sync] languages: 2280c/0u/0d/0e +[gateway-sync] countries: 240c/0u/0d/0e +[gateway-sync] keywords: 6030c/0u/0d/0e +[gateway-sync] videos: 4c/0u/0d/0e +[gateway-sync] video-variants: 72c/0u/0d/0e +``` + +Expected publish summary includes: + +```text +Published 2280 draft language records +Published 6598 draft country-language records +Published 6030 draft keyword records +Published 4 draft video records +Published 39 draft video-subtitle records +Published 72 draft video-variant records +Published 4 draft bible-citation records +Published 9 draft video-study-question records +``` + +### Warm rerun + +Run the same command again without wiping the DB only if you are explicitly testing the still-open +rerun/update issue. + +Current status: warm rerun is not yet a required pass condition for this fix. The clean import path +above is verified; rerun behavior is still being investigated separately. + +### SQLite verification + +```bash +sqlite3 apps/cms/.tmp/data.db " +select 'videos', count(*), sum(case when published_at is not null then 1 else 0 end) from videos +union all +select 'video_subtitles', count(*), sum(case when published_at is not null then 1 else 0 end) from video_subtitles +union all +select 'video_variants', count(*), sum(case when published_at is not null then 1 else 0 end) from video_variants +union all +select 'bible_citations', count(*), sum(case when published_at is not null then 1 else 0 end) from bible_citations +union all +select 'video_study_questions', count(*), sum(case when published_at is not null then 1 else 0 end) from video_study_questions; +" +``` + +Expected result after the clean run: + +```text +videos|8|4 +video_subtitles|78|39 +video_variants|144|72 +bible_citations|8|4 +video_study_questions|18|9 +``` + +The left value is total rows, the right value is published rows. The doubled total is normal +for Strapi v5 draft/publish because published documents keep their draft row.