Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions src/class-ss-util.php
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,115 @@ public static function get_static_site_url() {
return $target_url;
}

/**
* Get the destination base used when rewriting plain-text file URLs.
*
* This mirrors how absolute/relative/offline URLs are written elsewhere in
* the exporter while allowing an empty string for root-relative exports.
*
* @return string|null
*/
public static function get_text_file_destination_base() {
$options = Options::instance();

switch ( $options->get( 'destination_url_type' ) ) {
case 'absolute':
$destination_url = untrailingslashit( (string) $options->get_destination_url() );
return $destination_url === '' ? null : $destination_url;
case 'relative':
$relative_path = (string) $options->get( 'relative_path' );
return $relative_path === '' ? '' : untrailingslashit( $relative_path );
default:
return untrailingslashit( (string) $options->get_destination_url() );
}
}

/**
* Convert a sitemap reference found in robots.txt to a fully qualified URL.
*
* Search engines expect the robots.txt Sitemap directive to point to a
* complete URL. For relative exports we therefore promote local sitemap
* paths to the deployed static site URL instead of returning a relative path.
*
* @param string $url Sitemap URL or path.
*
* @return string
*/
public static function convert_text_file_sitemap_url( $url ) {
if ( ! is_string( $url ) || $url === '' ) {
return $url;
}

$options = Options::instance();
if ( 'absolute' !== $options->get( 'destination_url_type' ) ) {
return $url;
}

$static_site_url = untrailingslashit( self::get_static_site_url() );
if ( $static_site_url === '' ) {
return $url;
}

$absolute_url = self::relative_to_absolute_url( $url, trailingslashit( self::origin_url() ) . 'robots.txt' );
if ( ! is_string( $absolute_url ) || $absolute_url === '' || ! self::is_local_url( $absolute_url ) ) {
return $url;
}

$sanitized_path = self::sanitize_local_path( self::get_path_from_local_url( $absolute_url ) );

return $static_site_url . $sanitized_path;
}

/**
* Replace local origin URLs in exported plain-text files such as robots.txt.
*
* @param string $content File content to update.
*
* @return string
*/
public static function replace_origin_urls_in_text( $content ) {
if ( ! is_string( $content ) || $content === '' ) {
return $content;
}

$sitemap_placeholders = array();
$content = preg_replace_callback(
'/(^\s*Sitemap:\s*)(\S+)/im',
function ( $matches ) use ( &$sitemap_placeholders ) {
$placeholder = '__SIMPLY_STATIC_SITEMAP_' . count( $sitemap_placeholders ) . '__';
$sitemap_placeholders[ $placeholder ] = $matches[1] . self::convert_text_file_sitemap_url( $matches[2] );
return $placeholder;
},
$content
);

$destination_base = self::get_text_file_destination_base();
if ( null === $destination_base ) {
return empty( $sitemap_placeholders ) ? $content : strtr( $content, $sitemap_placeholders );
}

$origin_base = trailingslashit( self::origin_url() );
$origin_http = set_url_scheme( $origin_base, 'http' );
$origin_https = set_url_scheme( $origin_base, 'https' );
$origin_proto = preg_replace( '#^https?:#i', '', $origin_https );
$search = [
untrailingslashit( rtrim( $origin_http, '/' ) ),
untrailingslashit( rtrim( $origin_https, '/' ) ),
untrailingslashit( rtrim( $origin_proto, '/' ) ),
];
$content = str_replace( $search, $destination_base, $content );

$origin_host = self::origin_host();
$host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host );
$pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i';
$replaced = preg_replace( $pattern, $destination_base, $content );
if ( is_string( $replaced ) ) {
$content = $replaced;
}

return empty( $sitemap_placeholders ) ? $content : strtr( $content, $sitemap_placeholders );
}

/**
* Get the protocol used for the origin URL
* @return string http or https
Expand Down
14 changes: 11 additions & 3 deletions src/handlers/class-ss-rule-file-handler.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,18 @@ public static function transfer_rule_files( $destination_dir, $archive_dir ) : v
wp_mkdir_p( $dest_dir );
}

if ( ! @copy( $source, $dest ) ) {
Util::debug_log( '[Rule File] Failed to copy ' . $source . ' to ' . $dest );
$contents = @file_get_contents( $source );
if ( false === $contents ) {
Util::debug_log( '[Rule File] Failed to read ' . $source );
continue;
}

$processed = Util::replace_origin_urls_in_text( $contents );

if ( false === @file_put_contents( $dest, $processed ) ) {
Util::debug_log( '[Rule File] Failed to write ' . $source . ' to ' . $dest );
} else {
Util::debug_log( '[Rule File] Copied: ' . $dest );
Util::debug_log( '[Rule File] Wrote processed file: ' . $dest );
}
}
}
Expand Down
23 changes: 3 additions & 20 deletions src/handlers/class-ss-text-file-handler.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,10 @@ public function after_file_fetch( $destination_dir ) {
return;
}

$options = Options::instance();
$destination_url = rtrim( $options->get_destination_url(), '/' );
$replaced = Util::replace_origin_urls_in_text( $contents );

// First pass: regex on host (with optional port) to handle generic cases.
$origin_host = Util::origin_host();
$host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host );
$pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i';
$replaced = preg_replace( $pattern, $destination_url, $contents );

// Second pass fallback: replace exact origin home URL prefixes (http, https, protocol-relative),
// including potential subdirectory installs, in case the first regex didn't match due to path/port differences.
$home_http = set_url_scheme( home_url( '/' ), 'http' );
$home_https = set_url_scheme( home_url( '/' ), 'https' );
$home_proto = preg_replace( '#^https?:#i', '', $home_https ); // //example.com/...

$search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ];
$replace = array_fill( 0, count( $search ), untrailingslashit( rtrim( $destination_url, '/' ) ) );
$replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced );

if ( $replaced2 !== $contents ) {
@file_put_contents( $full_path, $replaced2 );
if ( $replaced !== $contents ) {
@file_put_contents( $full_path, $replaced );
Util::debug_log( '[Text_File_Handler] Replaced URLs in ' . $basename . ' at ' . $full_path );
return;
}
Expand Down
20 changes: 1 addition & 19 deletions src/integrations/class-ss-aio-seo-integration.php
Original file line number Diff line number Diff line change
Expand Up @@ -300,25 +300,7 @@ private function write_archive_file( $filename, $content ) {
* @return string
*/
private function replace_urls_in_text( $content ) {
if ( ! is_string( $content ) || $content === '' ) {
return $content;
}
$options = Options::instance();
$destination_url = rtrim( $options->get_destination_url(), '/' );
if ( empty( $destination_url ) ) {
return $content;
}
$origin_host = Util::origin_host();
$host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host );
$pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i';
$replaced = preg_replace( $pattern, $destination_url, $content );

$home_http = set_url_scheme( home_url( '/' ), 'http' );
$home_https = set_url_scheme( home_url( '/' ), 'https' );
$home_proto = preg_replace( '#^https?:#i', '', $home_https );
$search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ];
$replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced );
return $replaced2;
return Util::replace_origin_urls_in_text( $content );
}

/**
Expand Down
31 changes: 1 addition & 30 deletions src/integrations/class-ss-rank-math-integration.php
Original file line number Diff line number Diff line change
Expand Up @@ -140,36 +140,7 @@ private function write_archive_file( $filename, $content ) {
* @return string
*/
private function replace_urls_in_text( $content ) {
if ( ! is_string( $content ) ) {
return $content;
}
if ( $content === '' ) {
return $content;
}

$options = Options::instance();
$destination_url = rtrim( $options->get_destination_url(), '/' );
if ( empty( $destination_url ) ) {
return $content;
}

// First pass: regex on host (with optional port) to handle generic cases.
$origin_host = Util::origin_host();
$host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host );
$pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i';
$replaced = preg_replace( $pattern, $destination_url, $content );

// Second pass fallback: replace exact origin home URL prefixes (http, https, protocol-relative),
// including potential subdirectory installs.
$home_http = set_url_scheme( home_url( '/' ), 'http' );
$home_https = set_url_scheme( home_url( '/' ), 'https' );
$home_proto = preg_replace( '#^https?:#i', '', $home_https ); // //example.com/...
$search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ];
$replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced );

// No logging here; just return the updated content if any replacements were made.

return $replaced2;
return Util::replace_origin_urls_in_text( $content );
}

/**
Expand Down
20 changes: 1 addition & 19 deletions src/integrations/class-ss-yoast-integration.php
Original file line number Diff line number Diff line change
Expand Up @@ -375,25 +375,7 @@ private function write_archive_file( $filename, $content ) {
* @return string
*/
private function replace_urls_in_text( $content ) {
if ( ! is_string( $content ) || $content === '' ) {
return $content;
}
$options = Options::instance();
$destination_url = rtrim( $options->get_destination_url(), '/' );
if ( empty( $destination_url ) ) {
return $content;
}
$origin_host = Util::origin_host();
$host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host );
$pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i';
$replaced = preg_replace( $pattern, $destination_url, $content );

$home_http = set_url_scheme( home_url( '/' ), 'http' );
$home_https = set_url_scheme( home_url( '/' ), 'https' );
$home_proto = preg_replace( '#^https?:#i', '', $home_https );
$search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ];
$replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced );
return $replaced2;
return Util::replace_origin_urls_in_text( $content );
}

/**
Expand Down