diff --git a/src/class-ss-util.php b/src/class-ss-util.php index 506c5746..3bdbad49 100644 --- a/src/class-ss-util.php +++ b/src/class-ss-util.php @@ -611,6 +611,115 @@ public static function get_static_site_url() { return $target_url; } + /** + * Get the destination base used when rewriting plain-text file URLs. + * + * This mirrors how absolute/relative/offline URLs are written elsewhere in + * the exporter while allowing an empty string for root-relative exports. + * + * @return string|null + */ + public static function get_text_file_destination_base() { + $options = Options::instance(); + + switch ( $options->get( 'destination_url_type' ) ) { + case 'absolute': + $destination_url = untrailingslashit( (string) $options->get_destination_url() ); + return $destination_url === '' ? null : $destination_url; + case 'relative': + $relative_path = (string) $options->get( 'relative_path' ); + return $relative_path === '' ? '' : untrailingslashit( $relative_path ); + default: + return untrailingslashit( (string) $options->get_destination_url() ); + } + } + + /** + * Convert a sitemap reference found in robots.txt to a fully qualified URL. + * + * Search engines expect the robots.txt Sitemap directive to point to a + * complete URL. For relative exports we therefore promote local sitemap + * paths to the deployed static site URL instead of returning a relative path. + * + * @param string $url Sitemap URL or path. + * + * @return string + */ + public static function convert_text_file_sitemap_url( $url ) { + if ( ! is_string( $url ) || $url === '' ) { + return $url; + } + + $options = Options::instance(); + if ( 'absolute' !== $options->get( 'destination_url_type' ) ) { + return $url; + } + + $static_site_url = untrailingslashit( self::get_static_site_url() ); + if ( $static_site_url === '' ) { + return $url; + } + + $absolute_url = self::relative_to_absolute_url( $url, trailingslashit( self::origin_url() ) . 'robots.txt' ); + if ( ! is_string( $absolute_url ) || $absolute_url === '' || ! self::is_local_url( $absolute_url ) ) { + return $url; + } + + $sanitized_path = self::sanitize_local_path( self::get_path_from_local_url( $absolute_url ) ); + + return $static_site_url . $sanitized_path; + } + + /** + * Replace local origin URLs in exported plain-text files such as robots.txt. + * + * @param string $content File content to update. + * + * @return string + */ + public static function replace_origin_urls_in_text( $content ) { + if ( ! is_string( $content ) || $content === '' ) { + return $content; + } + + $sitemap_placeholders = array(); + $content = preg_replace_callback( + '/(^\s*Sitemap:\s*)(\S+)/im', + function ( $matches ) use ( &$sitemap_placeholders ) { + $placeholder = '__SIMPLY_STATIC_SITEMAP_' . count( $sitemap_placeholders ) . '__'; + $sitemap_placeholders[ $placeholder ] = $matches[1] . self::convert_text_file_sitemap_url( $matches[2] ); + return $placeholder; + }, + $content + ); + + $destination_base = self::get_text_file_destination_base(); + if ( null === $destination_base ) { + return empty( $sitemap_placeholders ) ? $content : strtr( $content, $sitemap_placeholders ); + } + + $origin_base = trailingslashit( self::origin_url() ); + $origin_http = set_url_scheme( $origin_base, 'http' ); + $origin_https = set_url_scheme( $origin_base, 'https' ); + $origin_proto = preg_replace( '#^https?:#i', '', $origin_https ); + $search = [ + untrailingslashit( rtrim( $origin_http, '/' ) ), + untrailingslashit( rtrim( $origin_https, '/' ) ), + untrailingslashit( rtrim( $origin_proto, '/' ) ), + ]; + $content = str_replace( $search, $destination_base, $content ); + + $origin_host = self::origin_host(); + $host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host ); + $pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i'; + $replaced = preg_replace( $pattern, $destination_base, $content ); + if ( is_string( $replaced ) ) { + $content = $replaced; + } + + return empty( $sitemap_placeholders ) ? $content : strtr( $content, $sitemap_placeholders ); + } + /** * Get the protocol used for the origin URL * @return string http or https diff --git a/src/handlers/class-ss-rule-file-handler.php b/src/handlers/class-ss-rule-file-handler.php index 7ab9a9ad..efd9b286 100644 --- a/src/handlers/class-ss-rule-file-handler.php +++ b/src/handlers/class-ss-rule-file-handler.php @@ -69,10 +69,18 @@ public static function transfer_rule_files( $destination_dir, $archive_dir ) : v wp_mkdir_p( $dest_dir ); } - if ( ! @copy( $source, $dest ) ) { - Util::debug_log( '[Rule File] Failed to copy ' . $source . ' to ' . $dest ); + $contents = @file_get_contents( $source ); + if ( false === $contents ) { + Util::debug_log( '[Rule File] Failed to read ' . $source ); + continue; + } + + $processed = Util::replace_origin_urls_in_text( $contents ); + + if ( false === @file_put_contents( $dest, $processed ) ) { + Util::debug_log( '[Rule File] Failed to write ' . $source . ' to ' . $dest ); } else { - Util::debug_log( '[Rule File] Copied: ' . $dest ); + Util::debug_log( '[Rule File] Wrote processed file: ' . $dest ); } } } diff --git a/src/handlers/class-ss-text-file-handler.php b/src/handlers/class-ss-text-file-handler.php index a177216c..2b1b0864 100644 --- a/src/handlers/class-ss-text-file-handler.php +++ b/src/handlers/class-ss-text-file-handler.php @@ -38,27 +38,10 @@ public function after_file_fetch( $destination_dir ) { return; } - $options = Options::instance(); - $destination_url = rtrim( $options->get_destination_url(), '/' ); + $replaced = Util::replace_origin_urls_in_text( $contents ); - // First pass: regex on host (with optional port) to handle generic cases. - $origin_host = Util::origin_host(); - $host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host ); - $pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i'; - $replaced = preg_replace( $pattern, $destination_url, $contents ); - - // Second pass fallback: replace exact origin home URL prefixes (http, https, protocol-relative), - // including potential subdirectory installs, in case the first regex didn't match due to path/port differences. - $home_http = set_url_scheme( home_url( '/' ), 'http' ); - $home_https = set_url_scheme( home_url( '/' ), 'https' ); - $home_proto = preg_replace( '#^https?:#i', '', $home_https ); // //example.com/... - - $search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ]; - $replace = array_fill( 0, count( $search ), untrailingslashit( rtrim( $destination_url, '/' ) ) ); - $replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced ); - - if ( $replaced2 !== $contents ) { - @file_put_contents( $full_path, $replaced2 ); + if ( $replaced !== $contents ) { + @file_put_contents( $full_path, $replaced ); Util::debug_log( '[Text_File_Handler] Replaced URLs in ' . $basename . ' at ' . $full_path ); return; } diff --git a/src/integrations/class-ss-aio-seo-integration.php b/src/integrations/class-ss-aio-seo-integration.php index 067af9b3..f6a6a2b5 100644 --- a/src/integrations/class-ss-aio-seo-integration.php +++ b/src/integrations/class-ss-aio-seo-integration.php @@ -300,25 +300,7 @@ private function write_archive_file( $filename, $content ) { * @return string */ private function replace_urls_in_text( $content ) { - if ( ! is_string( $content ) || $content === '' ) { - return $content; - } - $options = Options::instance(); - $destination_url = rtrim( $options->get_destination_url(), '/' ); - if ( empty( $destination_url ) ) { - return $content; - } - $origin_host = Util::origin_host(); - $host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host ); - $pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i'; - $replaced = preg_replace( $pattern, $destination_url, $content ); - - $home_http = set_url_scheme( home_url( '/' ), 'http' ); - $home_https = set_url_scheme( home_url( '/' ), 'https' ); - $home_proto = preg_replace( '#^https?:#i', '', $home_https ); - $search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ]; - $replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced ); - return $replaced2; + return Util::replace_origin_urls_in_text( $content ); } /** diff --git a/src/integrations/class-ss-rank-math-integration.php b/src/integrations/class-ss-rank-math-integration.php index 50895ec8..25b7411a 100644 --- a/src/integrations/class-ss-rank-math-integration.php +++ b/src/integrations/class-ss-rank-math-integration.php @@ -140,36 +140,7 @@ private function write_archive_file( $filename, $content ) { * @return string */ private function replace_urls_in_text( $content ) { - if ( ! is_string( $content ) ) { - return $content; - } - if ( $content === '' ) { - return $content; - } - - $options = Options::instance(); - $destination_url = rtrim( $options->get_destination_url(), '/' ); - if ( empty( $destination_url ) ) { - return $content; - } - - // First pass: regex on host (with optional port) to handle generic cases. - $origin_host = Util::origin_host(); - $host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host ); - $pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i'; - $replaced = preg_replace( $pattern, $destination_url, $content ); - - // Second pass fallback: replace exact origin home URL prefixes (http, https, protocol-relative), - // including potential subdirectory installs. - $home_http = set_url_scheme( home_url( '/' ), 'http' ); - $home_https = set_url_scheme( home_url( '/' ), 'https' ); - $home_proto = preg_replace( '#^https?:#i', '', $home_https ); // //example.com/... - $search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ]; - $replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced ); - - // No logging here; just return the updated content if any replacements were made. - - return $replaced2; + return Util::replace_origin_urls_in_text( $content ); } /** diff --git a/src/integrations/class-ss-yoast-integration.php b/src/integrations/class-ss-yoast-integration.php index 30e42820..62805a81 100644 --- a/src/integrations/class-ss-yoast-integration.php +++ b/src/integrations/class-ss-yoast-integration.php @@ -375,25 +375,7 @@ private function write_archive_file( $filename, $content ) { * @return string */ private function replace_urls_in_text( $content ) { - if ( ! is_string( $content ) || $content === '' ) { - return $content; - } - $options = Options::instance(); - $destination_url = rtrim( $options->get_destination_url(), '/' ); - if ( empty( $destination_url ) ) { - return $content; - } - $origin_host = Util::origin_host(); - $host_no_port = preg_replace( '/:\\d+$/', '', (string) $origin_host ); - $pattern = '/(?:https?:)?\\/\\/' . preg_quote( $host_no_port, '/' ) . '(?::\\d+)?/i'; - $replaced = preg_replace( $pattern, $destination_url, $content ); - - $home_http = set_url_scheme( home_url( '/' ), 'http' ); - $home_https = set_url_scheme( home_url( '/' ), 'https' ); - $home_proto = preg_replace( '#^https?:#i', '', $home_https ); - $search = [ untrailingslashit( rtrim( $home_http, '/' ) ), untrailingslashit( rtrim( $home_https, '/' ) ), untrailingslashit( rtrim( $home_proto, '/' ) ) ]; - $replaced2 = str_replace( $search, untrailingslashit( rtrim( $destination_url, '/' ) ), $replaced ); - return $replaced2; + return Util::replace_origin_urls_in_text( $content ); } /**