From 56fbfc0db5e3c6a92aaf8f5a6b1bc47983248525 Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Wed, 10 Jun 2026 20:08:41 +0200 Subject: [PATCH 1/7] Make pull-reprint re-runs perform a delta re-pull MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-running `studio pull-reprint` on a completed pull previously printed "pulled successfully" and exited without contacting the remote, so remote edits never reached the local site. A re-run now resets the stage machine and re-executes every phase; reprint makes each phase incremental (files delta sync, full DB re-download + re-apply with the idempotent dump). Details: - Persist a hasCompletedOnce flag in pull.json so the non-empty site-directory guard is skipped on re-pulls (the directory holds the previous pull's output) and drop the cached preflight so connectivity and the secret-rotation retry path are re-verified. - Re-apply the stored admin credentials over the running site's admin API after the database refresh: db-apply rebuilds the DB from the remote dump, wiping the local admin user and the studio_admin_username option that /studio-auto-login depends on. The server only re-applies credentials at startup, and a re-pull deliberately skips restarting a running server. A connection failure on that request doubles as a health check — if the daemon's "online" view is stale, fall through to a server start. - Reconcile pull.json with the live site record in ensurePort() before the DB stages: if the site record was re-created with a different id/port, db-apply would rewrite the database URLs to a port nothing serves. - Drop /tmp/reprint/* mounts with missing host paths when loading the persisted start-options.json: reprint state files mounted for the remote-uploads proxy are transient, and mounting a missing path crashed server starts with ENOENT. - Fix shouldRestartFilesSyncIndex() comparing against the legacy 'files-sync' command name; reprint canonicalizes to 'files-pull' when saving state, so the check was dead code. Requires the reprint-side refresh support (WordPress/reprint#257) in the bundled reprint.phar. --- apps/cli/commands/pull-reprint.ts | 194 ++++++++++++++++--- apps/cli/commands/tests/pull-reprint.test.ts | 177 +++++++++++++++++ apps/cli/lib/pull/reprint-state.ts | 8 +- apps/cli/lib/wordpress-server-manager.ts | 27 +++ 4 files changed, 378 insertions(+), 28 deletions(-) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 1f793cb378..2939bd2c72 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -21,6 +21,11 @@ import { sortSites } from '@studio/common/lib/sort-sites'; import { PullReprintCommandLoggerAction as LoggerAction } from '@studio/common/logger-actions'; import { __, sprintf } from '@wordpress/i18n'; import chalk from 'chalk'; +import { + getSetAdminCredentialsRequestBody, + shouldSetAdminCredentials, + toUrlSearchParams, +} from 'cli/lib/admin-credentials'; import { enableReprintExporter, getWpComSites, @@ -35,7 +40,12 @@ import { unlockCliConfig, } from 'cli/lib/cli-config/core'; import { getSiteUrl, updateSiteAutoStart, updateSiteLatestCliPid } from 'cli/lib/cli-config/sites'; -import { connectToDaemon, disconnectFromDaemon, emitCliEvent } from 'cli/lib/daemon-client'; +import { + connectToDaemon, + disconnectFromDaemon, + emitCliEvent, + isProcessRunning, +} from 'cli/lib/daemon-client'; import { type ReprintProcessResult, runReprintCommandUntilComplete, @@ -54,7 +64,7 @@ import { import { getDefaultSitePath } from 'cli/lib/site-paths'; import { buildAutoLoginUrl } from 'cli/lib/site-utils'; import { getPrettyPath } from 'cli/lib/utils'; -import { startWordPressServer } from 'cli/lib/wordpress-server-manager'; +import { getProcessName, startWordPressServer } from 'cli/lib/wordpress-server-manager'; import { Logger, LoggerError } from 'cli/logger'; import { StudioArgv } from 'cli/types'; @@ -191,6 +201,14 @@ interface PullSessionMetadata { runtimeDirectory: string; runtimeBlueprintPath: string; stage: PullStage; + /** + * True once this pull has reached the 'completed' stage at least once. + * A re-run after that point is a delta re-pull: the stage machine is + * reset so every phase re-executes, and the non-empty site-directory + * guard is skipped (the directory legitimately holds the previous + * pull's output). + */ + hasCompletedOnce?: boolean; siteId?: string; port?: number; localUrl?: string; @@ -243,6 +261,13 @@ class PullError extends LoggerError { * on (see {@link recordCompletedStage}), so a crash anywhere in the * pipeline resumes at the next stage on re-run. `--abort` detours to * {@link abortPull} instead. + * + * Re-running after a pull reached 'completed' performs a delta + * re-pull: the stage machine resets to 'initialized' and every phase + * re-executes against the preserved reprint state directory. Reprint + * makes each phase incremental — files re-sync as a delta (re-index + + * diff), the database is fully re-downloaded and re-applied (the dump + * is idempotent, so edits, inserts, and deletes all propagate). */ export async function runCommand( userProvidedUrl?: string, @@ -280,10 +305,30 @@ export async function runCommand( ); const apiUrl = getReprintApiUrlForSite( studioMetadata.normalizedUrl ); + // A previously completed pull re-runs as a delta sync: reset the + // stage machine so every phase executes again. Reprint does the + // incremental work against the preserved state directory — files + // re-sync as a delta (re-index + diff), the database is fully + // re-downloaded and re-applied (the dump is idempotent, so remote + // edits, inserts, and deletes all land locally). + const isRepull = studioMetadata.stage === 'completed'; + if ( isRepull ) { + studioMetadata.stage = 'initialized'; + studioMetadata.hasCompletedOnce = true; + savePullMetadata( studioMetadata ); + + // Re-verify connectivity (and give the secret-rotation retry path + // a chance to run) instead of trusting the cached preflight from + // the original pull, which may be days old. + fs.rmSync( path.join( studioMetadata.stateDirectory, 'preflight.json' ), { force: true } ); + } + // Refuse to clobber an existing non-empty site directory before the // flatten stage. Once flattened, the directory legitimately holds - // reprint's output; before that, anything there is user data. - if ( ! hasPullCompletedStage( studioMetadata, 'flattened' ) ) { + // reprint's output; before that, anything there is user data. On a + // re-pull (hasCompletedOnce) the directory holds the previous pull's + // output, so the guard doesn't apply. + if ( ! studioMetadata.hasCompletedOnce && ! hasPullCompletedStage( studioMetadata, 'flattened' ) ) { if ( ( await fsUtils.pathExists( studioMetadata.sitePath ) ) && ! ( await fsUtils.isEmptyDir( studioMetadata.sitePath ) ) @@ -326,13 +371,13 @@ export async function runCommand( fs.mkdirSync( studioMetadata.runtimeDirectory, { recursive: true } ); fs.mkdirSync( studioMetadata.sitePath, { recursive: true } ); - if ( studioMetadata.stage === 'completed' ) { - printCompletionMessage( studioMetadata ); - process.exit( 0 ); - } - const isResume = ! created || fs.readdirSync( studioMetadata.stateDirectory ).length > 0; - if ( isResume ) { + if ( isRepull ) { + console.log( + `Updating "${ studioMetadata.siteName }" from ${ studioMetadata.normalizedUrl } (delta sync)` + ); + console.log( '' ); + } else if ( isResume ) { console.log( `Resuming previous pull of "${ studioMetadata.siteName }" from ${ studioMetadata.normalizedUrl }` ); @@ -476,16 +521,39 @@ export async function runCommand( try { await connectToDaemon(); - const processDesc = await startWordPressServer( site, logger, runtimeStartOptions ); - logger.reportSuccess( __( 'WordPress server started' ) ); - if ( processDesc.status === 'online' ) { - await updateSiteLatestCliPid( site.id, processDesc.pid ); + // On a re-pull, the site's server is often already running. + // The synced files and database are picked up live (PHP + // opens them per request), so there's nothing to restart — + // but db-apply rebuilt the database from the remote dump, + // wiping the local admin user and the studio_admin_username + // option that /studio-auto-login depends on. A server start + // re-applies the credentials; when we skip the restart we + // must re-apply them over the running site's admin API. + // A connection failure means the daemon's view is stale and + // the server is actually down, so fall through to a start + // (which re-applies the credentials itself). + const runningProcess = await isProcessRunning( getProcessName( site.id ) ); + const credentialsResult = runningProcess + ? await reapplyAdminCredentials( site ) + : 'unreachable'; + if ( runningProcess && credentialsResult !== 'unreachable' ) { + logger.reportSuccess( __( 'WordPress server already running' ) ); + studioMetadata.localUrl = getSiteUrl( site ); + savePullMetadata( studioMetadata ); + recordCompletedStage( studioMetadata, 'site-started' ); + } else { + const processDesc = await startWordPressServer( site, logger, runtimeStartOptions ); + logger.reportSuccess( __( 'WordPress server started' ) ); + + if ( processDesc.status === 'online' ) { + await updateSiteLatestCliPid( site.id, processDesc.pid ); + } + await updateSiteAutoStart( site.id, true ); + studioMetadata.localUrl = getSiteUrl( site ); + savePullMetadata( studioMetadata ); + recordCompletedStage( studioMetadata, 'site-started' ); } - await updateSiteAutoStart( site.id, true ); - studioMetadata.localUrl = getSiteUrl( site ); - savePullMetadata( studioMetadata ); - recordCompletedStage( studioMetadata, 'site-started' ); } catch ( serverError ) { throw new LoggerError( __( 'Failed to start the WordPress server for the pulled site.' ), @@ -1302,25 +1370,45 @@ function recordCompletedStage( metadata: PullSessionMetadata, stage: PullStage ) * rewritten URLs) as the interrupted run. */ async function ensurePort( metadata: PullSessionMetadata ): Promise< void > { - if ( metadata.port && metadata.localUrl ) { - return; - } - const cliConfig = await readCliConfig(); - for ( const site of cliConfig.sites ) { - portFinder.addUnavailablePort( site.port ); - } + // When a Studio site record already exists for this pull, adopt its + // identity even if the metadata already carries a port — the record + // can change between runs (e.g. the site was deleted and re-created + // and got a different id/port). db-apply rewrites the database URLs + // to metadata.localUrl, so a stale port here would rewrite the site + // to a URL nothing serves. const existingSite = cliConfig.sites.find( ( site ) => ( metadata.siteId && site.id === metadata.siteId ) || fsUtils.arePathsEqual( site.path, metadata.sitePath ) || site.technicalSiteDirectory === metadata.technicalSiteDirectory ); + if ( existingSite ) { + if ( + metadata.siteId !== existingSite.id || + metadata.port !== existingSite.port || + metadata.localUrl !== getSiteUrl( existingSite ) + ) { + metadata.siteId = existingSite.id; + metadata.port = existingSite.port; + metadata.localUrl = getSiteUrl( existingSite ); + savePullMetadata( metadata ); + } + return; + } + + if ( metadata.port && metadata.localUrl ) { + return; + } - const port = existingSite?.port ?? ( await portFinder.getOpenPort() ); + for ( const site of cliConfig.sites ) { + portFinder.addUnavailablePort( site.port ); + } + + const port = await portFinder.getOpenPort(); metadata.port = port; - metadata.localUrl = existingSite ? getSiteUrl( existingSite ) : `http://localhost:${ port }`; + metadata.localUrl = `http://localhost:${ port }`; savePullMetadata( metadata ); } @@ -1364,6 +1452,58 @@ async function findExistingSite( metadata: PullSessionMetadata ): Promise< SiteD ); } +/** + * Re-applies the site's stored admin credentials over the running + * site's admin API (`POST /?studio-admin-api`) — the same endpoint + * both server runtimes hit on startup. + * + * Needed after a re-pull's db-apply: the remote dump contains neither + * the local admin user nor the `studio_admin_username` option, so + * rebuilding the database from it breaks `/studio-auto-login` until + * the credentials are applied again. + * + * Returns: + * - 'applied' credentials re-applied on the running server + * - 'skipped' the site record has no credentials to apply + * - 'unreachable' the server didn't answer — the caller should + * treat the site as not running and start it + */ +export async function reapplyAdminCredentials( + site: SiteData +): Promise< 'applied' | 'skipped' | 'unreachable' > { + const credentials = { + adminUsername: site.adminUsername, + adminPassword: site.adminPassword, + adminEmail: site.adminEmail, + }; + if ( ! shouldSetAdminCredentials( credentials ) ) { + return 'skipped'; + } + + let response: Response; + try { + response = await fetch( new URL( '/?studio-admin-api', getSiteUrl( site ) ), { + method: 'POST', + body: toUrlSearchParams( getSetAdminCredentialsRequestBody( credentials ) ), + signal: AbortSignal.timeout( 15_000 ), + } ); + } catch { + return 'unreachable'; + } + + if ( ! response.ok ) { + throw new LoggerError( + sprintf( + // translators: %d: HTTP status code. + __( 'Failed to re-apply the admin credentials after the database refresh (HTTP %d).' ), + response.status + ) + ); + } + + return 'applied'; +} + function printSiteUrls( localUrl: string ): void { console.log( __( 'Site URL: ' ), buildAutoLoginUrl( localUrl ) ); console.log( diff --git a/apps/cli/commands/tests/pull-reprint.test.ts b/apps/cli/commands/tests/pull-reprint.test.ts index 4083da2b61..7b83f6749f 100644 --- a/apps/cli/commands/tests/pull-reprint.test.ts +++ b/apps/cli/commands/tests/pull-reprint.test.ts @@ -523,3 +523,180 @@ describe( 'CLI: studio pull-reprint confirmation before creating a site', () => expect( fs.existsSync( technicalSiteDirectory ) ).toBe( true ); } ); } ); + +describe( 'CLI: studio pull-reprint delta re-pull of a completed pull', () => { + let fakeHome: string; + + afterEach( () => { + vi.restoreAllMocks(); + vi.resetModules(); + if ( fakeHome ) { + fs.rmSync( fakeHome, { recursive: true, force: true } ); + } + } ); + + /** + * Same throwaway-home harness as the confirmation tests: anchors + * PULLS_ROOT and the Studio sites root to a temp directory so the + * real runCommand never touches the developer's machine. + */ + async function loadRunCommandWithFakeHome() { + fakeHome = fs.mkdtempSync( path.join( os.tmpdir(), 'studio-pull-repull-home-' ) ); + + vi.resetModules(); + vi.doMock( 'os', async () => { + const actual = await vi.importActual< typeof import('os') >( 'os' ); + return { + ...actual, + default: { ...actual, homedir: () => fakeHome }, + homedir: () => fakeHome, + }; + } ); + + const mod = await import( '../pull-reprint' ); + return mod; + } + + it( 'resets a completed pull for a delta re-run instead of exiting early', async () => { + const { runCommand, getPrivateDirNameForImportSession, normalizeSiteUrl } = + await loadRunCommandWithFakeHome(); + + const normalizedUrl = normalizeSiteUrl( 'https://example.com' ); + const pullKey = getPrivateDirNameForImportSession( normalizedUrl, 'My Completed Site' ); + const pullsRoot = path.join( fakeHome, '.studio', 'pulls' ); + const technicalSiteDirectory = path.join( pullsRoot, pullKey ); + const stateDirectory = path.join( technicalSiteDirectory, 'state' ); + const sitePath = path.join( fakeHome, 'Studio', 'My-Completed-Site' ); + + // Seed a completed pull whose site directory is non-empty (it holds + // the previous pull's output) and whose preflight response is cached. + fs.mkdirSync( stateDirectory, { recursive: true } ); + fs.mkdirSync( sitePath, { recursive: true } ); + fs.writeFileSync( path.join( sitePath, 'wp-config.php' ), ' undefined ); + vi.spyOn( console, 'error' ).mockImplementation( () => undefined ); + + await expect( + runCommand( 'https://example.com', 'hmac-secret', 'My Completed Site', false, false, false ) + ).rejects.toThrow(); + + // The old behavior exited early without ever invoking reprint; the + // re-pull must re-enter the pipeline (preflight is its first call). + expect( reprintSpy ).toHaveBeenCalled(); + expect( reprintSpy.mock.calls[ 0 ][ 2 ][ 0 ] ).toBe( 'preflight' ); + + // The stage machine was reset and the re-pull marker persisted. + const metadata = JSON.parse( + fs.readFileSync( path.join( technicalSiteDirectory, 'pull.json' ), 'utf-8' ) + ); + expect( metadata.stage ).toBe( 'initialized' ); + expect( metadata.hasCompletedOnce ).toBe( true ); + + // The cached preflight was dropped so connectivity is re-verified. + expect( fs.existsSync( path.join( stateDirectory, 'preflight.json' ) ) ).toBe( false ); + + // The non-empty site directory did not trip the clobber guard. + expect( fs.existsSync( path.join( sitePath, 'wp-config.php' ) ) ).toBe( true ); + + // The user sees update messaging, not a no-op success. + expect( logSpy.mock.calls.flat().join( '\n' ) ).toContain( 'Updating "My Completed Site"' ); + } ); +} ); + +describe( 'CLI: studio pull-reprint admin credentials re-apply', () => { + afterEach( () => { + vi.restoreAllMocks(); + vi.unstubAllGlobals(); + } ); + + function makeSite( overrides: Record< string, unknown > = {} ) { + return { + id: 'site-1', + name: 'Test Site', + path: '/tmp/test-site', + port: 8901, + phpVersion: '8.2', + running: true, + ...overrides, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any; + } + + it( 'skips when the site record has no admin credentials', async () => { + const { reapplyAdminCredentials } = await import( '../pull-reprint' ); + const fetchSpy = vi.fn(); + vi.stubGlobal( 'fetch', fetchSpy ); + + await expect( reapplyAdminCredentials( makeSite() ) ).resolves.toBe( 'skipped' ); + expect( fetchSpy ).not.toHaveBeenCalled(); + } ); + + it( 'posts the stored credentials to the running site admin API', async () => { + const { reapplyAdminCredentials } = await import( '../pull-reprint' ); + const { encodePassword } = await import( '@studio/common/lib/passwords' ); + const fetchSpy = vi.fn().mockResolvedValue( { ok: true, status: 200 } ); + vi.stubGlobal( 'fetch', fetchSpy ); + + const site = makeSite( { adminPassword: encodePassword( 'secret-pw' ) } ); + await expect( reapplyAdminCredentials( site ) ).resolves.toBe( 'applied' ); + + expect( fetchSpy ).toHaveBeenCalledTimes( 1 ); + const [ url, init ] = fetchSpy.mock.calls[ 0 ]; + expect( String( url ) ).toContain( 'studio-admin-api' ); + expect( init.method ).toBe( 'POST' ); + const params = init.body as URLSearchParams; + expect( params.get( 'action' ) ).toBe( 'set_admin_password' ); + expect( params.get( 'password' ) ).toBe( 'secret-pw' ); + } ); + + it( 'reports an unreachable server instead of throwing on connection failure', async () => { + const { reapplyAdminCredentials } = await import( '../pull-reprint' ); + const { encodePassword } = await import( '@studio/common/lib/passwords' ); + vi.stubGlobal( 'fetch', vi.fn().mockRejectedValue( new Error( 'ECONNREFUSED' ) ) ); + + const site = makeSite( { adminPassword: encodePassword( 'secret-pw' ) } ); + await expect( reapplyAdminCredentials( site ) ).resolves.toBe( 'unreachable' ); + } ); + + it( 'throws when the admin API answers with an error status', async () => { + const { reapplyAdminCredentials } = await import( '../pull-reprint' ); + const { encodePassword } = await import( '@studio/common/lib/passwords' ); + vi.stubGlobal( 'fetch', vi.fn().mockResolvedValue( { ok: false, status: 400 } ) ); + + const site = makeSite( { adminPassword: encodePassword( 'secret-pw' ) } ); + await expect( reapplyAdminCredentials( site ) ).rejects.toThrow( + 'Failed to re-apply the admin credentials' + ); + } ); +} ); diff --git a/apps/cli/lib/pull/reprint-state.ts b/apps/cli/lib/pull/reprint-state.ts index 274f6c6bc4..62de7afb38 100644 --- a/apps/cli/lib/pull/reprint-state.ts +++ b/apps/cli/lib/pull/reprint-state.ts @@ -92,7 +92,13 @@ export function shouldRestartFilesSyncIndex( stateDirectory: string ): boolean { return false; } - if ( state.command !== 'files-sync' || state.status === 'complete' ) { + // reprint canonicalizes the legacy 'files-sync' command name to + // 'files-pull' when it saves state; accept both so this check keeps + // working across reprint versions. + if ( + ( state.command !== 'files-sync' && state.command !== 'files-pull' ) || + state.status === 'complete' + ) { return false; } diff --git a/apps/cli/lib/wordpress-server-manager.ts b/apps/cli/lib/wordpress-server-manager.ts index ddb74d6d4d..28df342389 100644 --- a/apps/cli/lib/wordpress-server-manager.ts +++ b/apps/cli/lib/wordpress-server-manager.ts @@ -200,6 +200,32 @@ async function ensurePhpBinaryAvailableIfNeeded( } } +/** + * Drops mounts of reprint state files whose host paths no longer exist. + * + * reprint's apply-runtime mounts importer state files (under /tmp/reprint + * in the VFS) for the temporary remote-uploads proxy. Those files are + * transient — a later sync can empty or remove them — so a persisted + * start-options.json can reference paths that are gone, and mounting a + * missing path crashes the server start with ENOENT. Critical site mounts + * (core, wp-content, wp-config.php) are intentionally NOT filtered: if + * those are missing, failing loudly is correct. + */ +function dropStaleReprintStateMounts( options: StartServerOptions ): StartServerOptions { + const isStale = ( mount: { hostPath: string; vfsPath: string } ) => + mount.vfsPath.startsWith( '/tmp/reprint/' ) && ! fs.existsSync( mount.hostPath ); + + return { + ...options, + ...( options.mountsBeforeInstall && { + mountsBeforeInstall: options.mountsBeforeInstall.filter( ( m ) => ! isStale( m ) ), + } ), + ...( options.mounts && { + mounts: options.mounts.filter( ( m ) => ! isStale( m ) ), + } ), + }; +} + export async function startWordPressServer( site: SiteData, logger: Logger< string >, @@ -216,6 +242,7 @@ export async function startWordPressServer( ); if ( fs.existsSync( optionsPath ) ) { options = JSON.parse( fs.readFileSync( optionsPath, 'utf-8' ) ) as StartServerOptions; + options = dropStaleReprintStateMounts( options ); } } From 19388cdfaa282906273768542041c8613ab6a043 Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Wed, 10 Jun 2026 20:20:02 +0200 Subject: [PATCH 2/7] Fix prettier formatting of the re-pull directory guard --- apps/cli/commands/pull-reprint.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 2939bd2c72..cbc9bbdd57 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -328,7 +328,10 @@ export async function runCommand( // reprint's output; before that, anything there is user data. On a // re-pull (hasCompletedOnce) the directory holds the previous pull's // output, so the guard doesn't apply. - if ( ! studioMetadata.hasCompletedOnce && ! hasPullCompletedStage( studioMetadata, 'flattened' ) ) { + if ( + ! studioMetadata.hasCompletedOnce && + ! hasPullCompletedStage( studioMetadata, 'flattened' ) + ) { if ( ( await fsUtils.pathExists( studioMetadata.sitePath ) ) && ! ( await fsUtils.isEmptyDir( studioMetadata.sitePath ) ) From 9bd3e1f8cd6f886d01bef07e8a1cd3339bca9c6d Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Wed, 17 Jun 2026 12:37:33 +0200 Subject: [PATCH 3/7] Re-enable reprint exporter on preflight retry so cached-secret re-pulls reconnect --- apps/cli/commands/pull-reprint.ts | 19 ++- apps/cli/commands/tests/pull-reprint.test.ts | 118 ++++++++++++++++++- 2 files changed, 133 insertions(+), 4 deletions(-) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index e6dd556686..889ae31f11 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -402,9 +402,14 @@ export async function runCommand( try { preflight = await runPreflight( studioMetadata, apiUrl, secret, verbose ); } catch ( preflightError ) { - // The stored secret may have expired. Resolve the WP.com site - // (loading the site list only now, if we haven't already) and - // rotate the secret before retrying the preflight. + // Preflight against ?reprint-api can fail for two reasons we can + // recover from on WP.com: the stored secret expired, or the + // wpcomsh exporter gate (`reprint_exporter_enabled`, a 60-minute + // sliding window) closed since the last run. A cached-secret + // resume skips the happy-path enable above, so this is the common + // case on a delta re-pull. Resolve the WP.com site (loading the + // site list only now, if we haven't already), then both rotate the + // secret AND re-enable the exporter before retrying. if ( sourceSite.wpComSite && sourceSite.wpComToken ) { secret = await rotateReprintSecret( sourceSite.wpComSite.id, @@ -431,6 +436,14 @@ export async function runCommand( sourceSite.wpComToken = token; secret = await rotateReprintSecret( matched.id, token.accessToken ); } + // Rotating the secret does not bump `reprint_exporter_enabled`, so + // re-open the gate explicitly; otherwise the retry hits the same + // closed window and ?reprint-api falls through to an HTML page. + await enableReprintExporter( + sourceSite.wpComSite.id, + sourceSite.wpComToken.accessToken, + verbose + ); preflight = await runPreflight( studioMetadata, apiUrl, secret, verbose ); } studioMetadata.remoteSiteUrl = preflight.siteurl || studioMetadata.normalizedUrl; diff --git a/apps/cli/commands/tests/pull-reprint.test.ts b/apps/cli/commands/tests/pull-reprint.test.ts index 08998837f5..6bc14d6d91 100644 --- a/apps/cli/commands/tests/pull-reprint.test.ts +++ b/apps/cli/commands/tests/pull-reprint.test.ts @@ -3,7 +3,7 @@ import os from 'node:os'; import path from 'node:path'; import { readAuthToken } from '@studio/common/lib/shared-config'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { rotateReprintSecret } from 'cli/lib/api'; +import { enableReprintExporter, rotateReprintSecret } from 'cli/lib/api'; import * as migrationClient from 'cli/lib/pull/migration-client'; import { shouldRestartFilesSyncIndex } from 'cli/lib/pull/reprint-state'; import { fetchSyncableSites } from 'cli/lib/sync-api'; @@ -27,6 +27,7 @@ vi.mock( '@studio/common/lib/shared-config', async ( importOriginal ) => ( { vi.mock( 'cli/lib/api', async ( importOriginal ) => ( { ...( await importOriginal< typeof import('cli/lib/api') >() ), rotateReprintSecret: vi.fn(), + enableReprintExporter: vi.fn(), } ) ); vi.mock( 'cli/lib/sync-api', async ( importOriginal ) => ( { ...( await importOriginal< typeof import('cli/lib/sync-api') >() ), @@ -824,6 +825,121 @@ describe( 'CLI: studio pull-reprint delta re-pull of a completed pull', () => { } ); } ); +describe( 'CLI: studio pull-reprint preflight retry re-enables the exporter', () => { + let fakeHome: string; + + const token = { + accessToken: 'access-token', + id: 1, + email: 'user@example.com', + displayName: 'User', + expiresIn: 1209600, + expirationTime: Date.now() + 1209600000, + }; + + const sites: SyncSite[] = [ + { + id: 22, + name: 'Example', + url: 'https://example.com', + localSiteId: '', + isStaging: false, + isPressable: false, + syncSupport: 'syncable', + lastPullTimestamp: null, + lastPushTimestamp: null, + }, + ]; + + afterEach( () => { + vi.restoreAllMocks(); + vi.resetModules(); + if ( fakeHome ) { + fs.rmSync( fakeHome, { recursive: true, force: true } ); + } + } ); + + async function loadRunCommandWithFakeHome() { + fakeHome = fs.mkdtempSync( path.join( os.tmpdir(), 'studio-pull-retry-home-' ) ); + + vi.resetModules(); + vi.doMock( 'os', async () => { + const actual = await vi.importActual< typeof import('os') >( 'os' ); + return { + ...actual, + default: { ...actual, homedir: () => fakeHome }, + homedir: () => fakeHome, + }; + } ); + + const mod = await import( '../pull-reprint' ); + return mod; + } + + it( 're-enables the exporter (not just rotates the secret) before retrying a failed preflight', async () => { + const { runCommand, getPrivateDirNameForImportSession, normalizeSiteUrl } = + await loadRunCommandWithFakeHome(); + + // Seed a previously-completed pull with a cached secret but no + // wpComSite/wpComToken — exactly the delta-re-pull shape that makes + // resolveSourceSite short-circuit on the cached secret and skip the + // happy-path exporter enable. + const normalizedUrl = normalizeSiteUrl( 'https://example.com' ); + const pullKey = getPrivateDirNameForImportSession( normalizedUrl, 'My Retry Site' ); + const pullsRoot = path.join( fakeHome, '.studio', 'pulls' ); + const technicalSiteDirectory = path.join( pullsRoot, pullKey ); + const stateDirectory = path.join( technicalSiteDirectory, 'state' ); + const sitePath = path.join( fakeHome, 'Studio', 'My-Retry-Site' ); + + fs.mkdirSync( stateDirectory, { recursive: true } ); + fs.mkdirSync( sitePath, { recursive: true } ); + fs.writeFileSync( path.join( sitePath, 'wp-config.php' ), ' undefined ); + vi.spyOn( console, 'error' ).mockImplementation( () => undefined ); + + await expect( + runCommand( 'https://example.com', undefined, 'My Retry Site', false, false, true ) + ).rejects.toThrow(); + + // The fix: the retry resolves the WP.com site, rotates the secret, AND + // re-opens the exporter gate. Without the enable call the retry would + // hit the same closed window and fail identically. + expect( rotateReprintSecret ).toHaveBeenCalledWith( 22, token.accessToken ); + expect( enableReprintExporter ).toHaveBeenCalledWith( 22, token.accessToken, false ); + } ); +} ); + describe( 'CLI: studio pull-reprint admin credentials re-apply', () => { afterEach( () => { vi.restoreAllMocks(); From 7ddc3c1052b8b932f50f4624099edc032b1d56c6 Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Thu, 18 Jun 2026 11:47:42 +0200 Subject: [PATCH 4/7] Clear completed sub-command state with --abort before a re-pull A delta re-pull re-runs files-sync/db-sync/db-apply against a state dir where the previous pull left them "complete". Re-running as-is either throws (files-sync rejects a --filter change while a sync still looks in progress) or silently no-ops, so the database never refreshes. Issue ` --abort` before each on a re-pull to reset the sub-command state so every phase re-runs: files-sync keeps the local index and deltas, db-sync re-downloads, db-apply re-applies. Gated on hasCompletedOnce (the persisted "this pull finished once" flag) rather than the local isRepull, because the first re-pull attempt already reset the stage to "initialized"; a retry must still abort. Each command is aborted only while its stage is still pending, so a mid-pull resume never wipes a phase that already re-ran. Runs after preflight, which files-sync/db-sync --abort require. This removes the dependency on reprint's sub-commands auto-refreshing, so the re-pull works against a stock reprint build. --- apps/cli/commands/pull-reprint.ts | 81 +++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 889ae31f11..3d12bbc030 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -451,6 +451,21 @@ export async function runCommand( studioMetadata.secret = secret; savePullMetadata( studioMetadata ); + // On a delta re-pull, the reprint sub-command state in the shared + // state directory still reads "complete" from the previous pull, so a + // plain re-run makes files-sync refuse the run (a --filter change + // throws "Cannot change --filter … while a sync is in progress") and + // lets db-sync/db-apply skip their work. Clear that state with --abort + // so each phase re-runs as a delta. Gated on hasCompletedOnce (the + // persisted "this pull finished once" flag) rather than the local + // isRepull, because the first re-pull attempt already reset stage to + // 'initialized'; a later retry must still abort. The helper itself + // only aborts stages still pending, so a mid-pull resume is safe. + // Placed after preflight because files-sync/db-sync --abort require it. + if ( studioMetadata.hasCompletedOnce ) { + await clearCompletedSubcommandState( studioMetadata, apiUrl, secret, verbose ); + } + if ( ! hasPullCompletedStage( studioMetadata, 'essential-files-complete' ) ) { await downloadEssentialSiteFiles( studioMetadata, apiUrl, secret, verbose ); } @@ -876,6 +891,72 @@ export async function applyDownloadedDatabase( recordCompletedStage( metadata, 'db-applied' ); } +/** + * Clear leftover "complete" reprint sub-command state before a delta + * re-pull re-runs each phase. + * + * After a finished pull, files-sync / db-sync / db-apply all report + * "complete" in the shared state directory. A plain re-run then either + * throws (files-sync rejects a `--filter` change while a sync still + * looks in progress: "Cannot change --filter … while a sync is in + * progress") or silently no-ops. `--abort` resets each command without + * losing useful work: files-sync keeps the local file index (so the + * re-run is a delta, not a full re-download), db-sync deletes db.sql + * (so the dump is re-fetched), and db-apply clears its apply tracking + * without touching the target tables (the next apply re-creates them; + * the dump is idempotent). + * + * files-sync and db-sync `--abort` require a prior preflight, so this + * must run after {@link runPreflight}. Each command is aborted only while + * its stage is still pending in the current re-pull, so re-running this on + * a resumed re-pull never wipes a phase that already re-ran. + */ +async function clearCompletedSubcommandState( + metadata: PullSessionMetadata, + apiUrl: string, + secret: string, + verbose: boolean +): Promise< void > { + const stateArgs = [ + `--secret=${ secret }`, + `--state-dir=${ metadata.stateDirectory }`, + `--fs-root=${ metadata.rawDirectory }`, + ]; + // Only abort a command whose stage hasn't completed yet in this re-pull + // cycle. Once a phase has re-run, its output must be preserved: aborting + // files-sync would delete the freshly written skipped-files list the tail + // needs, and aborting db-sync would delete the db.sql that db-apply reads. + const aborts: Array< { stage: PullStage; args: string[] } > = [ + // files-sync --abort keeps the local index, so the re-run deltas. + { + stage: 'essential-files-complete', + args: buildFilesSyncArgs( metadata, apiUrl, secret, [ '--abort' ] ), + }, + // --sql-output=file so the abort also removes the stale db.sql. + { + stage: 'db-downloaded', + args: [ 'db-sync', apiUrl, '--abort', '--sql-output=file', ...stateArgs ], + }, + // Clears apply tracking only; target tables are re-created on apply. + { stage: 'db-applied', args: [ 'db-apply', apiUrl, '--abort', ...stateArgs ] }, + ]; + const pending = aborts.filter( ( { stage } ) => ! hasPullCompletedStage( metadata, stage ) ); + if ( pending.length === 0 ) { + return; + } + logger.reportStart( LoggerAction.ABORT_IMPORT, __( 'Preparing delta re-pull…' ) ); + for ( const { args } of pending ) { + await runReprintCommandUntilComplete( + metadata.stateDirectory, + metadata.rawDirectory, + args, + undefined, + { verboseCommands: verbose } + ); + } + logger.reportSuccess( __( 'Previous pull state cleared' ) ); +} + /** * Fetch the minimum set of files needed to produce a usable flattened * site directory: wp-config.php, wp-includes, active plugins/themes, From 46f10596bcc6a7e12067ffbcb84487c51f21cc2a Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Thu, 18 Jun 2026 13:30:22 +0200 Subject: [PATCH 5/7] Refresh latestCliPid and autoStart when the server is already running On a re-pull where the daemon process is still up, the site-started "already running" branch skipped updateSiteLatestCliPid and updateSiteAutoStart; both only ran on the start branch. The running-status check requires site.latestCliPid to equal the live process pid, so a stale or missing pid (e.g. after ensurePort adopts a re-created site record) made the site report as stopped. Mirror the start branch and `studio site start`'s already-running path: refresh latestCliPid when online and keep autoStart enabled. --- apps/cli/commands/pull-reprint.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 3d12bbc030..11f24fcde3 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -563,6 +563,14 @@ export async function runCommand( : 'unreachable'; if ( runningProcess && credentialsResult !== 'unreachable' ) { logger.reportSuccess( __( 'WordPress server already running' ) ); + // Mirror the start branch (and `studio site start`'s + // already-running path): refresh latestCliPid so + // running-status checks match the live process, and keep + // autoStart enabled as every pull has. + if ( runningProcess.status === 'online' ) { + await updateSiteLatestCliPid( site.id, runningProcess.pid ); + } + await updateSiteAutoStart( site.id, true ); studioMetadata.localUrl = getSiteUrl( site ); savePullMetadata( studioMetadata ); recordCompletedStage( studioMetadata, 'site-started' ); From 0b27bdff5d7976fef1ffcce5d61d68d65f6a3748 Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Thu, 18 Jun 2026 18:21:01 +0200 Subject: [PATCH 6/7] Drive pull-reprint with a single `reprint pull` invocation Replace the per-sub-command orchestration (downloadEssentialSiteFiles, refreshFlattenedSiteDirectory, downloadRemoteDatabase, applyDownloadedDatabase, generateRuntimeConfiguration) and the clearCompletedSubcommandState/--abort delta-reset with one `reprint pull` call. reprint owns the pipeline ordering (files-pull -> db-pull -> db-apply -> flat-docroot -> apply-runtime) and resets its own state for a delta re-pull via prepare_repull(), so the Studio-side --abort wiring and per-phase stage gating go away. - runFullPull() issues the single pull with the same sqlite geometry the old db-apply used (target sqlite under the raw content dir) plus --flatten-to, --runtime=playground-cli, --start-runtime=none and --output-dir, mounting the site + runtime dirs up front. - ensurePort moves before the pull so --new-site-url is available. - Collapse the stage machine from 9 stages to 5 (initialized -> pulled -> site-registered -> site-started -> completed). - Bump the PHP-WASM memory_limit to 1024M: the single long-lived fork holds the file-index high-water-mark across phases. Requires reprint's flatten_to->flat_document_root bridge and the re-pull filter-guard fix (WordPress/reprint#268). --- apps/cli/commands/pull-reprint.ts | 362 +++---------------- apps/cli/commands/tests/pull-reprint.test.ts | 91 +++-- apps/cli/reprint-child.ts | 6 +- 3 files changed, 115 insertions(+), 344 deletions(-) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index 11f24fcde3..f11cd69001 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -50,7 +50,6 @@ import { getReprintStatePath, hasSkippedFiles, readReprintState, - shouldRestartFilesSyncIndex, } from 'cli/lib/pull/reprint-state'; import { ensureImportedSiteSqliteReady, @@ -142,11 +141,7 @@ const PULLS_ROOT = path.join( os.homedir(), '.studio', 'pulls' ); const pullStageOrder = [ 'initialized', - 'essential-files-complete', - 'flattened', - 'db-downloaded', - 'db-applied', - 'runtime-generated', + 'pulled', 'site-registered', 'site-started', 'completed', @@ -238,27 +233,24 @@ class PullError extends LoggerError { * resolvePullSource → * resolvePullMetadata → * runPreflight (with secret-rotate retry on WP.com) → - * downloadEssentialSiteFiles → - * refreshFlattenedSiteDirectory → - * downloadRemoteDatabase → * ensurePort → - * applyDownloadedDatabase → - * generateRuntimeConfiguration → + * runFullPull (one `reprint pull`: files-pull → db-pull → db-apply → + * flat-docroot → apply-runtime) → * registerSite → * startWordPressServer → * downloadSkippedFiles. * - * Every stage persists its completion to `pull.json` before moving - * on (see {@link recordCompletedStage}), so a crash anywhere in the - * pipeline resumes at the next stage on re-run. `--abort` detours to - * {@link abortPull} instead. + * Each Studio stage persists to `pull.json` (see {@link + * recordCompletedStage}), so a crash resumes at the next stage; within + * the pull, reprint resumes its own pipeline from its last completed + * sub-stage. `--abort` detours to {@link abortPull} instead. * * Re-running after a pull reached 'completed' performs a delta - * re-pull: the stage machine resets to 'initialized' and every phase - * re-executes against the preserved reprint state directory. Reprint - * makes each phase incremental — files re-sync as a delta (re-index + - * diff), the database is fully re-downloaded and re-applied (the dump - * is idempotent, so edits, inserts, and deletes all propagate). + * re-pull: Studio's stage machine resets to 'initialized' and reprint + * resets its own sub-command state via prepare_repull(). Each phase is + * incremental — files re-sync as a delta (re-index + diff), the + * database is fully re-downloaded and re-applied (the dump is + * idempotent, so edits, inserts, and deletes all propagate). */ export async function runCommand( userProvidedUrl?: string, @@ -319,10 +311,7 @@ export async function runCommand( // reprint's output; before that, anything there is user data. On a // re-pull (hasCompletedOnce) the directory holds the previous pull's // output, so the guard doesn't apply. - if ( - ! studioMetadata.hasCompletedOnce && - ! hasPullCompletedStage( studioMetadata, 'flattened' ) - ) { + if ( ! studioMetadata.hasCompletedOnce && ! hasPullCompletedStage( studioMetadata, 'pulled' ) ) { if ( ( await fsUtils.pathExists( studioMetadata.sitePath ) ) && ! ( await fsUtils.isEmptyDir( studioMetadata.sitePath ) ) @@ -451,44 +440,19 @@ export async function runCommand( studioMetadata.secret = secret; savePullMetadata( studioMetadata ); - // On a delta re-pull, the reprint sub-command state in the shared - // state directory still reads "complete" from the previous pull, so a - // plain re-run makes files-sync refuse the run (a --filter change - // throws "Cannot change --filter … while a sync is in progress") and - // lets db-sync/db-apply skip their work. Clear that state with --abort - // so each phase re-runs as a delta. Gated on hasCompletedOnce (the - // persisted "this pull finished once" flag) rather than the local - // isRepull, because the first re-pull attempt already reset stage to - // 'initialized'; a later retry must still abort. The helper itself - // only aborts stages still pending, so a mid-pull resume is safe. - // Placed after preflight because files-sync/db-sync --abort require it. - if ( studioMetadata.hasCompletedOnce ) { - await clearCompletedSubcommandState( studioMetadata, apiUrl, secret, verbose ); - } - - if ( ! hasPullCompletedStage( studioMetadata, 'essential-files-complete' ) ) { - await downloadEssentialSiteFiles( studioMetadata, apiUrl, secret, verbose ); - } - - if ( ! hasPullCompletedStage( studioMetadata, 'flattened' ) ) { - logger.reportStart( LoggerAction.CREATE_SITE, __( 'Preparing site directory…' ) ); - await refreshFlattenedSiteDirectory( studioMetadata, verbose ); - logger.reportSuccess( __( 'Site directory prepared' ) ); - recordCompletedStage( studioMetadata, 'flattened' ); - } - - if ( ! hasPullCompletedStage( studioMetadata, 'db-downloaded' ) ) { - await downloadRemoteDatabase( studioMetadata, apiUrl, secret, verbose ); - } - + // Allocate the local port before the pull so db-apply (run inside + // the composite `pull`) can rewrite the remote site URL to the + // local one the Studio server will serve. await ensurePort( studioMetadata ); - if ( ! hasPullCompletedStage( studioMetadata, 'db-applied' ) ) { - await applyDownloadedDatabase( studioMetadata, secret, verbose ); - } - - if ( ! hasPullCompletedStage( studioMetadata, 'runtime-generated' ) ) { - await generateRuntimeConfiguration( studioMetadata, verbose ); + // A single `reprint pull` runs the whole pipeline in one PHP-WASM + // fork: files-pull → db-pull → db-apply → flat-docroot → + // apply-runtime. reprint owns stage ordering internally and, on a + // delta re-pull, resets its own sub-command state via + // prepare_repull() — so the former clearCompletedSubcommandState + // (--abort) wiring and per-phase stage gating are gone. + if ( ! hasPullCompletedStage( studioMetadata, 'pulled' ) ) { + await runFullPull( studioMetadata, apiUrl, secret, verbose ); } let createdSiteRecord = false; @@ -847,243 +811,61 @@ function readPullMetadata( metadataPath: string ): PullSessionMetadata | null { } /** - * Apply the downloaded SQL dump into an SQLite database that Studio - * will mount as the imported site's `wp-content/database/.ht.sqlite`. + * Run reprint's composite `pull` command — the whole site-clone + * pipeline (preflight → files-pull → db-pull → db-apply → + * flat-docroot → apply-runtime) in a single PHP-WASM fork. This + * replaces the former per-sub-command orchestration: reprint owns the + * stage ordering and, when the prior pull already completed, resets + * its own sub-command state for a delta re-pull via prepare_repull(). + * + * The SQLite target follows the same geometry db-apply used before: + * - If preflight exposed the remote `wp-content` (contentDir set), + * the database lands under `rawDirectory + contentDir`, an + * already-mounted host path that flat-docroot later symlinks into + * the flattened site. + * - Otherwise it falls back to `sitePath/wp-content`. * - * Mount strategy depends on whether preflight told us where the - * remote site kept its `wp-content`: - * - If it did (contentDir set), reprint writes the SQLite file under - * `rawDirectory + contentDir`, which is already a mounted host path, - * so no extra mount is needed. - * - If not, we write into `sitePath/wp-content` directly and mount - * that host path into the PHP WASM runtime so reprint can see it. + * The flattened site (`--flatten-to`) and runtime output + * (`--output-dir`) directories are mounted up front so the single + * fork can write them onto the host filesystem. `ensurePort` must + * run first so `--new-site-url` points at the local server. * - * Advances the pull stage to 'db-applied' on success; thrown errors - * propagate up to the pull orchestrator for a user-facing abort. + * Advances the pull stage to 'pulled'. */ -export async function applyDownloadedDatabase( +export async function runFullPull( metadata: PullSessionMetadata, + apiUrl: string, secret: string, verbose: boolean ): Promise< void > { - logger.reportStart( LoggerAction.IMPORT_SQL, __( 'Applying database…' ) ); const contentDir = getContentDirFromState( metadata.stateDirectory ); const sqlitePath = contentDir ? `${ metadata.rawDirectory }${ contentDir }/database/.ht.sqlite` : `${ metadata.sitePath }/wp-content/database/.ht.sqlite`; - const dbApplyMounts = contentDir - ? [] - : [ { hostPath: metadata.sitePath, vfsPath: metadata.sitePath } ]; - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - [ - 'db-apply', - getReprintApiUrlForSite( metadata.normalizedUrl ), - `--state-dir=${ metadata.stateDirectory }`, - `--fs-root=${ metadata.rawDirectory }`, - '--target-engine=sqlite', - `--target-sqlite-path=${ sqlitePath }`, - `--new-site-url=${ metadata.localUrl! }`, - `--secret=${ secret }`, - '--no-adaptive', - ], - ( progress ) => logger.reportProgress( progress ), - { - progressLabel: __( 'Applying database' ), - mounts: dbApplyMounts, - verboseCommands: verbose, - } - ); - logger.reportSuccess( __( 'Database applied' ) ); - recordCompletedStage( metadata, 'db-applied' ); -} -/** - * Clear leftover "complete" reprint sub-command state before a delta - * re-pull re-runs each phase. - * - * After a finished pull, files-sync / db-sync / db-apply all report - * "complete" in the shared state directory. A plain re-run then either - * throws (files-sync rejects a `--filter` change while a sync still - * looks in progress: "Cannot change --filter … while a sync is in - * progress") or silently no-ops. `--abort` resets each command without - * losing useful work: files-sync keeps the local file index (so the - * re-run is a delta, not a full re-download), db-sync deletes db.sql - * (so the dump is re-fetched), and db-apply clears its apply tracking - * without touching the target tables (the next apply re-creates them; - * the dump is idempotent). - * - * files-sync and db-sync `--abort` require a prior preflight, so this - * must run after {@link runPreflight}. Each command is aborted only while - * its stage is still pending in the current re-pull, so re-running this on - * a resumed re-pull never wipes a phase that already re-ran. - */ -async function clearCompletedSubcommandState( - metadata: PullSessionMetadata, - apiUrl: string, - secret: string, - verbose: boolean -): Promise< void > { - const stateArgs = [ - `--secret=${ secret }`, - `--state-dir=${ metadata.stateDirectory }`, - `--fs-root=${ metadata.rawDirectory }`, - ]; - // Only abort a command whose stage hasn't completed yet in this re-pull - // cycle. Once a phase has re-run, its output must be preserved: aborting - // files-sync would delete the freshly written skipped-files list the tail - // needs, and aborting db-sync would delete the db.sql that db-apply reads. - const aborts: Array< { stage: PullStage; args: string[] } > = [ - // files-sync --abort keeps the local index, so the re-run deltas. - { - stage: 'essential-files-complete', - args: buildFilesSyncArgs( metadata, apiUrl, secret, [ '--abort' ] ), - }, - // --sql-output=file so the abort also removes the stale db.sql. - { - stage: 'db-downloaded', - args: [ 'db-sync', apiUrl, '--abort', '--sql-output=file', ...stateArgs ], - }, - // Clears apply tracking only; target tables are re-created on apply. - { stage: 'db-applied', args: [ 'db-apply', apiUrl, '--abort', ...stateArgs ] }, - ]; - const pending = aborts.filter( ( { stage } ) => ! hasPullCompletedStage( metadata, stage ) ); - if ( pending.length === 0 ) { - return; - } - logger.reportStart( LoggerAction.ABORT_IMPORT, __( 'Preparing delta re-pull…' ) ); - for ( const { args } of pending ) { - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - args, - undefined, - { verboseCommands: verbose } - ); - } - logger.reportSuccess( __( 'Previous pull state cleared' ) ); -} - -/** - * Fetch the minimum set of files needed to produce a usable flattened - * site directory: wp-config.php, wp-includes, active plugins/themes, - * uploads. Heavier wp-content payload (unused plugins, dev caches) - * is deferred to {@link downloadSkippedFiles} so the site becomes - * runnable sooner. - * - * Advances the pull stage to 'essential-files-complete'. - */ -export async function downloadEssentialSiteFiles( - metadata: PullSessionMetadata, - apiUrl: string, - secret: string, - verbose: boolean -): Promise< void > { - // When reprint crashed mid-indexing on a previous run without - // persisting a cursor, a plain files-sync would try to resume from - // a cursor that doesn't exist. Clear that state via - // `files-sync --abort` so the real sync below starts fresh. - if ( shouldRestartFilesSyncIndex( metadata.stateDirectory ) ) { - logger.reportWarning( - __( - 'Restarting remote file indexing before resume because the previous run did not save a resumable cursor.' - ) - ); - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - buildFilesSyncArgs( metadata, apiUrl, secret, [ '--abort' ] ), - undefined, - { - verboseCommands: verbose, - } - ); - logger.reportSuccess( __( 'Interrupted file indexing state cleared' ) ); - } - - logger.reportStart( LoggerAction.DOWNLOAD_FILES, __( 'Downloading site files…' ) ); - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - buildFilesSyncArgs( metadata, apiUrl, secret, [ - '--filter=essential-files', - '--follow-symlinks', - ] ), - ( progress ) => logger.reportProgress( progress ), - { - progressLabel: 'Downloading files', - verboseCommands: verbose, - } - ); - logger.reportSuccess( __( 'Files downloaded' ) ); - recordCompletedStage( metadata, 'essential-files-complete' ); -} - -/** - * Stream the remote database into `stateDirectory/db.sql` via reprint's - * `db-sync` command. We download-first-then-apply (rather than piping - * straight into sqlite) so a crash during apply can resume without - * re-fetching the dump. - * - * Advances the pull stage to 'db-downloaded'. - */ -export async function downloadRemoteDatabase( - metadata: PullSessionMetadata, - apiUrl: string, - secret: string, - verbose: boolean -): Promise< void > { - logger.reportStart( LoggerAction.DOWNLOAD_SQL, __( 'Downloading database…' ) ); + logger.reportStart( LoggerAction.DOWNLOAD_FILES, __( 'Pulling site…' ) ); await runReprintCommandUntilComplete( metadata.stateDirectory, metadata.rawDirectory, [ - 'db-sync', + 'pull', apiUrl, `--secret=${ secret }`, - '--sql-output=file', + '--filter=essential-files', + '--target-engine=sqlite', + `--target-sqlite-path=${ sqlitePath }`, + `--new-site-url=${ metadata.localUrl! }`, + `--flatten-to=${ metadata.sitePath }`, + '--runtime=playground-cli', + '--start-runtime=none', + `--output-dir=${ metadata.runtimeDirectory }`, '--no-adaptive', `--state-dir=${ metadata.stateDirectory }`, `--fs-root=${ metadata.rawDirectory }`, ], ( progress ) => logger.reportProgress( progress ), { - progressLabel: __( 'Downloading database' ), - verboseCommands: verbose, - } - ); - logger.reportSuccess( __( 'Database downloaded' ) ); - recordCompletedStage( metadata, 'db-downloaded' ); -} - -/** - * Produce the Playground CLI start script + runtime blueprint that - * Studio uses to boot the imported site. reprint reads the flattened - * site layout and preflight output, and emits a ready-to-run runtime - * under `runtimeDirectory`. Both directories are mounted into WASM - * so the runtime output lands on the host filesystem. - * - * Advances the pull stage to 'runtime-generated'. - */ -export async function generateRuntimeConfiguration( - metadata: PullSessionMetadata, - verbose: boolean -): Promise< void > { - logger.reportStart( LoggerAction.URL_REWRITE, __( 'Generating runtime configuration…' ) ); - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - [ - 'apply-runtime', - '--no-adaptive', - `--state-dir=${ metadata.stateDirectory }`, - `--flat-document-root=${ metadata.sitePath }`, - `--output-dir=${ metadata.runtimeDirectory }`, - '--runtime=playground-cli', - ], - undefined, - { + progressLabel: __( 'Pulling site' ), mounts: [ { hostPath: metadata.sitePath, vfsPath: metadata.sitePath }, { hostPath: metadata.runtimeDirectory, vfsPath: metadata.runtimeDirectory }, @@ -1091,8 +873,8 @@ export async function generateRuntimeConfiguration( verboseCommands: verbose, } ); - logger.reportSuccess( __( 'Runtime configuration generated' ) ); - recordCompletedStage( metadata, 'runtime-generated' ); + logger.reportSuccess( __( 'Site pulled' ) ); + recordCompletedStage( metadata, 'pulled' ); } /** @@ -1526,36 +1308,6 @@ async function ensurePort( metadata: PullSessionMetadata ): Promise< void > { savePullMetadata( metadata ); } -/** - * Runs reprint's `flat-document-root` to produce the flattened site - * directory (symlinks + merged wp-content layout) from the raw tree. - */ -async function refreshFlattenedSiteDirectory( - metadata: Pick< - PullSessionMetadata, - 'stateDirectory' | 'rawDirectory' | 'sitePath' | 'runtimeBlueprintPath' | 'normalizedUrl' - >, - verbose: boolean -): Promise< void > { - await runReprintCommandUntilComplete( - metadata.stateDirectory, - metadata.rawDirectory, - [ - 'flat-document-root', - getReprintApiUrlForSite( metadata.normalizedUrl ), - '--no-adaptive', - `--state-dir=${ metadata.stateDirectory }`, - `--fs-root=${ metadata.rawDirectory }`, - `--flatten-to=${ metadata.sitePath }`, - ], - undefined, - { - mounts: [ { hostPath: metadata.sitePath, vfsPath: metadata.sitePath } ], - verboseCommands: verbose, - } - ); -} - async function findExistingSite( metadata: PullSessionMetadata ): Promise< SiteData | undefined > { const cliConfig = await readCliConfig(); return cliConfig.sites.find( diff --git a/apps/cli/commands/tests/pull-reprint.test.ts b/apps/cli/commands/tests/pull-reprint.test.ts index 6bc14d6d91..09eba3f6f5 100644 --- a/apps/cli/commands/tests/pull-reprint.test.ts +++ b/apps/cli/commands/tests/pull-reprint.test.ts @@ -9,7 +9,7 @@ import { shouldRestartFilesSyncIndex } from 'cli/lib/pull/reprint-state'; import { fetchSyncableSites } from 'cli/lib/sync-api'; import { pickSyncSite } from 'cli/lib/sync-site-picker'; import { - applyDownloadedDatabase, + runFullPull, downloadSkippedFiles, findMatchingWpComSite, getReprintApiUrlForSite, @@ -178,25 +178,26 @@ describe( 'CLI: studio pull-reprint helpers', () => { } ); } ); -describe( 'CLI: studio pull-reprint db-apply phase', () => { +describe( 'CLI: studio pull-reprint single pull phase', () => { afterEach( () => { vi.restoreAllMocks(); } ); - it( 'runs reprint db-apply against the content dir from preflight, mounts nothing extra, and advances the stage', async () => { + it( 'runs one reprint pull with sqlite under the content dir, mounts the site + runtime, and advances the stage', async () => { const technicalSiteDirectory = fs.mkdtempSync( - path.join( os.tmpdir(), 'studio-import-db-apply-' ) + path.join( os.tmpdir(), 'studio-import-pull-' ) ); const stateDirectory = path.join( technicalSiteDirectory, 'state' ); const rawDirectory = path.join( technicalSiteDirectory, 'raw' ); const sitePath = path.join( technicalSiteDirectory, 'site' ); + const runtimeDirectory = path.join( technicalSiteDirectory, 'runtime' ); fs.mkdirSync( stateDirectory, { recursive: true } ); fs.mkdirSync( rawDirectory, { recursive: true } ); // Preflight reported the remote site's wp-content path at - // database.wp.paths_urls.content_dir; db-apply should target an - // sqlite file under rawDirectory + that path — no extra mount - // needed because rawDirectory is already mounted. + // database.wp.paths_urls.content_dir; the pull's db-apply stage targets + // an sqlite file under rawDirectory + that path so flat-docroot can + // symlink it into the flattened site. fs.writeFileSync( path.join( stateDirectory, '.import-state.json' ), JSON.stringify( { @@ -227,50 +228,59 @@ describe( 'CLI: studio pull-reprint db-apply phase', () => { technicalSiteDirectory, rawDirectory, stateDirectory, - runtimeDirectory: path.join( technicalSiteDirectory, 'runtime' ), - runtimeBlueprintPath: path.join( technicalSiteDirectory, 'runtime', 'blueprint.json' ), - stage: 'db-downloaded', + runtimeDirectory, + runtimeBlueprintPath: path.join( runtimeDirectory, 'blueprint.json' ), + stage: 'initialized', localUrl: 'http://localhost:8881', remoteSiteUrl: 'https://example.com', } as never; - await applyDownloadedDatabase( metadata, 'hmac-secret', false ); + await runFullPull( metadata, 'https://example.com/?reprint-api', 'hmac-secret', false ); expect( reprint ).toHaveBeenCalledTimes( 1 ); const [ passedState, passedRaw, passedArgs, , passedOptions ] = reprint.mock.calls[ 0 ]; expect( passedState ).toBe( stateDirectory ); expect( passedRaw ).toBe( rawDirectory ); expect( passedArgs ).toEqual( [ - 'db-apply', + 'pull', 'https://example.com/?reprint-api', - `--state-dir=${ stateDirectory }`, - `--fs-root=${ rawDirectory }`, + '--secret=hmac-secret', + '--filter=essential-files', '--target-engine=sqlite', `--target-sqlite-path=${ rawDirectory }/srv/htdocs/wp-content/database/.ht.sqlite`, '--new-site-url=http://localhost:8881', - '--secret=hmac-secret', + `--flatten-to=${ sitePath }`, + '--runtime=playground-cli', + '--start-runtime=none', + `--output-dir=${ runtimeDirectory }`, '--no-adaptive', + `--state-dir=${ stateDirectory }`, + `--fs-root=${ rawDirectory }`, + ] ); + // The flattened site and runtime output dirs are mounted up front so + // the single fork can write them to the host filesystem. + expect( passedOptions?.mounts ).toEqual( [ + { hostPath: sitePath, vfsPath: sitePath }, + { hostPath: runtimeDirectory, vfsPath: runtimeDirectory }, ] ); - // No extra mount needed — reprint can already see the sqlite target - // through the rawDirectory mount. - expect( passedOptions?.mounts ).toEqual( [] ); - // Stage is bumped + persisted so a resumed run skips db-apply. + // Stage is bumped + persisted so a resumed run skips the pull. const persisted = JSON.parse( fs.readFileSync( path.join( technicalSiteDirectory, 'pull.json' ), 'utf-8' ) ); - expect( persisted.stage ).toBe( 'db-applied' ); + expect( persisted.stage ).toBe( 'pulled' ); fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); } ); - it( 'mounts the flattened site path when preflight did not expose a content dir', async () => { + it( 'falls back to the flattened wp-content sqlite path when preflight exposes no content dir', async () => { const technicalSiteDirectory = fs.mkdtempSync( - path.join( os.tmpdir(), 'studio-import-db-apply-fallback-' ) + path.join( os.tmpdir(), 'studio-import-pull-fallback-' ) ); const stateDirectory = path.join( technicalSiteDirectory, 'state' ); const rawDirectory = path.join( technicalSiteDirectory, 'raw' ); const sitePath = path.join( technicalSiteDirectory, 'site' ); + const runtimeDirectory = path.join( technicalSiteDirectory, 'runtime' ); fs.mkdirSync( stateDirectory, { recursive: true } ); fs.mkdirSync( rawDirectory, { recursive: true } ); @@ -292,33 +302,38 @@ describe( 'CLI: studio pull-reprint db-apply phase', () => { technicalSiteDirectory, rawDirectory, stateDirectory, - runtimeDirectory: path.join( technicalSiteDirectory, 'runtime' ), - runtimeBlueprintPath: path.join( technicalSiteDirectory, 'runtime', 'blueprint.json' ), - stage: 'db-downloaded', + runtimeDirectory, + runtimeBlueprintPath: path.join( runtimeDirectory, 'blueprint.json' ), + stage: 'initialized', localUrl: 'http://localhost:8881', remoteSiteUrl: 'https://example.com', } as never; - await applyDownloadedDatabase( metadata, 'hmac-secret', false ); + await runFullPull( metadata, 'https://example.com/?reprint-api', 'hmac-secret', false ); const [ , , passedArgs, , passedOptions ] = reprint.mock.calls[ 0 ]; - // Sqlite now lands under the flattened site directly, and we mount - // that host path so reprint can reach it inside PHP WASM. + // With no content dir from preflight, the sqlite target falls back to + // the flattened site's wp-content. expect( passedArgs ).toContain( `--target-sqlite-path=${ sitePath }/wp-content/database/.ht.sqlite` ); - expect( passedOptions?.mounts ).toEqual( [ { hostPath: sitePath, vfsPath: sitePath } ] ); + // The site + runtime dirs are always mounted for the single fork. + expect( passedOptions?.mounts ).toEqual( [ + { hostPath: sitePath, vfsPath: sitePath }, + { hostPath: runtimeDirectory, vfsPath: runtimeDirectory }, + ] ); fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); } ); - it( 'propagates the reprint error and leaves stage at db-downloaded for a safe resume', async () => { + it( 'propagates the reprint error and leaves the stage before "pulled" for a safe resume', async () => { const technicalSiteDirectory = fs.mkdtempSync( - path.join( os.tmpdir(), 'studio-import-db-apply-fail-' ) + path.join( os.tmpdir(), 'studio-import-pull-fail-' ) ); const stateDirectory = path.join( technicalSiteDirectory, 'state' ); const rawDirectory = path.join( technicalSiteDirectory, 'raw' ); const sitePath = path.join( technicalSiteDirectory, 'site' ); + const runtimeDirectory = path.join( technicalSiteDirectory, 'runtime' ); fs.mkdirSync( stateDirectory, { recursive: true } ); fs.mkdirSync( rawDirectory, { recursive: true } ); fs.writeFileSync( @@ -339,20 +354,20 @@ describe( 'CLI: studio pull-reprint db-apply phase', () => { technicalSiteDirectory, rawDirectory, stateDirectory, - runtimeDirectory: path.join( technicalSiteDirectory, 'runtime' ), - runtimeBlueprintPath: path.join( technicalSiteDirectory, 'runtime', 'blueprint.json' ), - stage: 'db-downloaded' as const, + runtimeDirectory, + runtimeBlueprintPath: path.join( runtimeDirectory, 'blueprint.json' ), + stage: 'initialized' as const, localUrl: 'http://localhost:8881', remoteSiteUrl: 'https://example.com', }; await expect( - applyDownloadedDatabase( metadata as never, 'hmac-secret', false ) + runFullPull( metadata as never, 'https://example.com/?reprint-api', 'hmac-secret', false ) ).rejects.toThrow( 'reprint exited with code 1' ); - // Stage must NOT advance — otherwise a resume would skip db-apply - // even though the database never made it into sqlite. - expect( metadata.stage ).toBe( 'db-downloaded' ); + // Stage must NOT advance to 'pulled' — otherwise a resume would skip + // the pull even though the site never finished importing. + expect( metadata.stage ).toBe( 'initialized' ); expect( fs.existsSync( path.join( technicalSiteDirectory, 'pull.json' ) ) ).toBe( false ); fs.rmSync( technicalSiteDirectory, { recursive: true, force: true } ); diff --git a/apps/cli/reprint-child.ts b/apps/cli/reprint-child.ts index 7addcdbb00..cf407c7d51 100644 --- a/apps/cli/reprint-child.ts +++ b/apps/cli/reprint-child.ts @@ -167,7 +167,11 @@ async function runReprint( msg: RunMessage ) { 'openssl.cafile': '/tmp/ca-bundle.crt', 'curl.cainfo': '/tmp/ca-bundle.crt', allow_url_fopen: 1, - memory_limit: '512M', + // The composite `pull` runs the whole pipeline in one long-lived + // fork (no per-sub-command teardown to free the heap), so the + // WASM high-water-mark from the file index carries across phases. + // 1024M gives headroom over the ~510M peak seen on large sites. + memory_limit: '1024M', error_reporting: String( 32767 & ~8192 ), display_errors: 'stderr', log_errors: 0, From dc16064e2525ba44cc516c65678858cd2777dea2 Mon Sep 17 00:00:00 2001 From: Roberto Aranda Date: Thu, 18 Jun 2026 18:59:17 +0200 Subject: [PATCH 7/7] Describe current behavior in pull-reprint comments Drop the historic 'replaces the former clearCompletedSubcommandState' framing from the pull comment and the runFullPull docblock; describe what the single-pull flow does now. --- apps/cli/commands/pull-reprint.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/apps/cli/commands/pull-reprint.ts b/apps/cli/commands/pull-reprint.ts index f11cd69001..9f781d3c1d 100644 --- a/apps/cli/commands/pull-reprint.ts +++ b/apps/cli/commands/pull-reprint.ts @@ -447,10 +447,9 @@ export async function runCommand( // A single `reprint pull` runs the whole pipeline in one PHP-WASM // fork: files-pull → db-pull → db-apply → flat-docroot → - // apply-runtime. reprint owns stage ordering internally and, on a - // delta re-pull, resets its own sub-command state via - // prepare_repull() — so the former clearCompletedSubcommandState - // (--abort) wiring and per-phase stage gating are gone. + // apply-runtime. reprint owns the stage ordering internally and, on + // a delta re-pull, resets its own sub-command state via + // prepare_repull(). if ( ! hasPullCompletedStage( studioMetadata, 'pulled' ) ) { await runFullPull( studioMetadata, apiUrl, secret, verbose ); } @@ -811,14 +810,14 @@ function readPullMetadata( metadataPath: string ): PullSessionMetadata | null { } /** - * Run reprint's composite `pull` command — the whole site-clone + * Run reprint's composite `pull` command: the whole site-clone * pipeline (preflight → files-pull → db-pull → db-apply → - * flat-docroot → apply-runtime) in a single PHP-WASM fork. This - * replaces the former per-sub-command orchestration: reprint owns the - * stage ordering and, when the prior pull already completed, resets - * its own sub-command state for a delta re-pull via prepare_repull(). + * flat-docroot → apply-runtime) in a single PHP-WASM fork, with + * reprint owning the stage ordering and, when the prior pull already + * completed, resetting its own sub-command state for a delta re-pull + * via prepare_repull(). * - * The SQLite target follows the same geometry db-apply used before: + * The SQLite target geometry: * - If preflight exposed the remote `wp-content` (contentDir set), * the database lands under `rawDirectory + contentDir`, an * already-mounted host path that flat-docroot later symlinks into