diff --git a/packages/producer/src/services/renderOrchestrator.test.ts b/packages/producer/src/services/renderOrchestrator.test.ts index 03ee4e9817..d7a23d6877 100644 --- a/packages/producer/src/services/renderOrchestrator.test.ts +++ b/packages/producer/src/services/renderOrchestrator.test.ts @@ -2,11 +2,22 @@ import { afterEach, describe, expect, it, vi } from "vitest"; import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join, win32 } from "node:path"; import { tmpdir } from "node:os"; -import type { EngineConfig, ExtractedFrames } from "@hyperframes/engine"; +import type { CaptureOptions, EngineConfig, ExtractedFrames } from "@hyperframes/engine"; +import { executeParallelCapture, mergeWorkerFrames } from "@hyperframes/engine"; import type { CompiledComposition } from "./htmlCompiler.js"; +// Replace only the two engine functions the adaptive-retry loop uses to touch +// disk; everything else (distributeFrames, types, etc.) stays real so the loop +// runs for real against a temp framesDir. +vi.mock("@hyperframes/engine", async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, executeParallelCapture: vi.fn(), mergeWorkerFrames: vi.fn() }; +}); + import { buildMissingFrameRetryBatches, + captureAttemptMadeProgress, + executeDiskCaptureWithAdaptiveRetry, collectVideoMetadataHints, collectVideoReadinessSkipIds, extractStandaloneEntryFromIndex, @@ -95,6 +106,69 @@ describe("extractStandaloneEntryFromIndex", () => { }); }); +describe("captureAttemptMadeProgress", () => { + it("retries when the attempt captured at least one frame toward its target", () => { + // targeted 100 frames, 40 still missing -> 60 captured -> worth retrying the rest + expect(captureAttemptMadeProgress(100, 40)).toBe(true); + expect(captureAttemptMadeProgress(100, 99)).toBe(true); + }); + + it("bails when the attempt captured nothing (structurally broken composition)", () => { + // targeted 100 frames, 100 still missing -> zero progress -> don't burn another timeout cycle + expect(captureAttemptMadeProgress(100, 100)).toBe(false); + // defensive: never-greater-than guard, treat >= target as no progress + expect(captureAttemptMadeProgress(100, 120)).toBe(false); + }); +}); + +describe("executeDiskCaptureWithAdaptiveRetry — zero-progress bail (integration)", () => { + const makeLog = () => ({ error: vi.fn(), warn: vi.fn(), info: vi.fn(), debug: vi.fn() }); + + afterEach(() => { + vi.mocked(executeParallelCapture).mockReset(); + vi.mocked(mergeWorkerFrames).mockReset(); + }); + + it("runs exactly one attempt (no worker-halving retries) when an attempt captures zero frames", async () => { + // Capture writes nothing -> framesDir stays empty -> every frame still missing. + vi.mocked(executeParallelCapture).mockResolvedValue([]); + vi.mocked(mergeWorkerFrames).mockResolvedValue(undefined); + + const workDir = mkdtempSync(join(tmpdir(), "hf-retry-work-")); + const framesDir = mkdtempSync(join(tmpdir(), "hf-retry-frames-")); + const log = makeLog(); + try { + await expect( + executeDiskCaptureWithAdaptiveRetry({ + serverUrl: "http://localhost:0", + workDir, + framesDir, + totalFrames: 4, + initialWorkerCount: 4, + allowRetry: true, + frameExt: "jpg", + captureOptions: {} as CaptureOptions, + createBeforeCaptureHook: () => null, + cfg: {} as EngineConfig, + log, + dedupPerfs: [], + }), + ).rejects.toThrow(/4 frame\(s\) are missing/); + + // The gate under test: without it the loop would walk 4 -> 2 -> 1 workers + // (3 capture calls) before giving up. One call proves it bailed immediately. + expect(vi.mocked(executeParallelCapture)).toHaveBeenCalledTimes(1); + expect(log.warn).toHaveBeenCalledWith( + expect.stringContaining("no forward progress"), + expect.objectContaining({ attempt: 0, frameCount: 4, remainingCount: 4 }), + ); + } finally { + rmSync(workDir, { recursive: true, force: true }); + rmSync(framesDir, { recursive: true, force: true }); + } + }); +}); + describe("shouldUseStreamingEncode", () => { const streamingEnabledConfig = { enableStreamingEncode: true, diff --git a/packages/producer/src/services/renderOrchestrator.ts b/packages/producer/src/services/renderOrchestrator.ts index dc1867063c..77e5d0c03b 100644 --- a/packages/producer/src/services/renderOrchestrator.ts +++ b/packages/producer/src/services/renderOrchestrator.ts @@ -548,6 +548,23 @@ export function getNextRetryWorkerCount(currentWorkers: number): number { return Math.max(1, Math.floor(currentWorkers / 2)); } +/** + * A retry only pays off if the attempt that just finished captured at least one + * frame toward its target. When it captured nothing (frames still missing >= + * frames it set out to capture), the composition is structurally broken — a + * never-ready page, zero duration, or unparseable HTML — not a flaky worker. + * Re-running it at lower parallelism just burns another full readiness/protocol + * timeout per worker, turning a render that can never succeed into a long hang. + * A partially-captured attempt still retries, so genuine flaky-worker gaps are + * unaffected. + */ +export function captureAttemptMadeProgress( + attemptTargetFrameCount: number, + remainingFrameCount: number, +): boolean { + return remainingFrameCount < attemptTargetFrameCount; +} + export function isRecoverableParallelCaptureError(error: unknown): boolean { const message = normalizeErrorMessage(error); return ( @@ -673,11 +690,17 @@ export async function executeDiskCaptureWithAdaptiveRetry(options: { if (remaining.length === 0) { return attempts; } - if (!options.allowRetry || currentWorkers <= 1) { - throw new Error( - `[Render] Capture completed but ${countFrameRanges(remaining)} frame(s) are missing`, + const remainingCount = countFrameRanges(remaining); + const madeProgress = captureAttemptMadeProgress(frameCount, remainingCount); + if (!madeProgress) { + options.log.warn( + "[Render] Capture attempt made no forward progress; composition is likely structurally broken — not retrying.", + { attempt, frameCount, remainingCount, workers: currentWorkers }, ); } + if (!options.allowRetry || currentWorkers <= 1 || !madeProgress) { + throw new Error(`[Render] Capture completed but ${remainingCount} frame(s) are missing`); + } const nextWorkers = getNextRetryWorkerCount(currentWorkers); options.log.warn("[Render] Retrying missing captured frames with fewer workers.", { @@ -697,7 +720,20 @@ export async function executeDiskCaptureWithAdaptiveRetry(options: { if (remaining.length === 0) { return attempts; } - if (!options.allowRetry || currentWorkers <= 1 || !isRecoverableParallelCaptureError(error)) { + const remainingCount = countFrameRanges(remaining); + const madeProgress = captureAttemptMadeProgress(frameCount, remainingCount); + if (!madeProgress) { + options.log.warn( + "[Render] Capture attempt made no forward progress; composition is likely structurally broken — not retrying.", + { attempt, frameCount, remainingCount, workers: currentWorkers }, + ); + } + if ( + !options.allowRetry || + currentWorkers <= 1 || + !isRecoverableParallelCaptureError(error) || + !madeProgress + ) { throw error; }