diff --git a/.changeset/weak-cloths-lose.md b/.changeset/weak-cloths-lose.md new file mode 100644 index 000000000..b81bac9d0 --- /dev/null +++ b/.changeset/weak-cloths-lose.md @@ -0,0 +1,5 @@ +--- +"@headstartwp/headstartwp": patch +--- + +Fix how data-wp-block attribute is set to avoid generating incorrect/insecure markup diff --git a/docs/documentation/06-WordPress Integration/gutenberg.md b/docs/documentation/06-WordPress Integration/gutenberg.md index 699fe06f8..02ac173f5 100644 --- a/docs/documentation/06-WordPress Integration/gutenberg.md +++ b/docs/documentation/06-WordPress Integration/gutenberg.md @@ -40,14 +40,14 @@ This filter is not as useful as the previous one but it allows you to filter the /** * Filter's out the block's attributes after serialization * - * @param string $encoded_attrs The serialized block's Attributes + * @param string $encoded_attrs The block attributes serialized to a JSON string * @param array $attrs The Block's Attributes * @param array $block The Block's schema * @param \WP_Block $block_instance The block's instance */ $block_attrs_serialized = apply_filters( 'tenup_headless_wp_render_blocks_attrs_serialized', - esc_attr( wp_json_encode( $block_attrs ) ), + wp_json_encode( $block_attrs ), $block_attrs, $block, $block_instance diff --git a/wp/headless-wp/includes/classes/Integrations/Gutenberg.php b/wp/headless-wp/includes/classes/Integrations/Gutenberg.php index a4e8c8b9a..cf6f50848 100644 --- a/wp/headless-wp/includes/classes/Integrations/Gutenberg.php +++ b/wp/headless-wp/includes/classes/Integrations/Gutenberg.php @@ -304,12 +304,37 @@ public function process_block_with_dom_document_api( $html, $block_name, $block_ } } + /** + * Set the block attributes in the HTML + * + * This is a workaround to avoid the issue with the HTML_Tag_Processor API not handling JSON with HTML in attributes. + * + * @see https://github.com/10up/headstartwp/pull/921 + * + * @param string $placeholder The placeholder for the block attributes + * @param string $html The block markup + * @param string $block_attrs_serialized The block attributes serialized to a JSON string + * + * @return string The processed html + */ + public function set_block_attributes_tag_api( $placeholder, $html, $block_attrs_serialized ) { + $search = sprintf( '/data-wp-block="%s"/', preg_quote( $placeholder, '/' ) ); + $replace = sprintf( 'data-wp-block="%s"', htmlspecialchars( $block_attrs_serialized ) ); + + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped + return preg_replace( + $search, + $replace, + $html + ); + } + /** * Process the block with the WP_HTML_Tag_Processor * * @param string $html The block markup * @param string $block_name The block name - * @param string $block_attrs_serialized The serialized block attributes + * @param string $block_attrs_serialized The block attributes serialized to a JSON string * @param array $block The block schema * @param WP_Block $block_instance The block instance * @@ -321,7 +346,14 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr if ( ! $this->bypass_block_attributes( $block_name, $block_instance ) && $doc->next_tag() ) { $doc->set_attribute( 'data-wp-block-name', $block_name ); - $doc->set_attribute( 'data-wp-block', $block_attrs_serialized ); + $placeholder = '___HEADSTARTWP_BLOCK_ATTRS___'; + $doc->set_attribute( 'data-wp-block', $placeholder ); + + $intermediate_html = $doc->get_updated_html(); + $intermediate_html = $this->set_block_attributes_tag_api( $placeholder, $intermediate_html, $block_attrs_serialized ); + + $doc = new WP_HTML_Tag_Processor( $intermediate_html ); + $doc->next_tag(); /** * Filter the block before rendering @@ -347,7 +379,7 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr * * @param string $html The block markup * @param string $block_name The block name - * @param string $serialized_attributes Serialized attributes + * @param string $serialized_attributes The block attributes serialized to a JSON string * @param array $block The block array * @param WP_Block $block_instance The block instance * @@ -365,14 +397,8 @@ public function process_dom_document_block( // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase $root_node = $document->documentElement; - $attrs = $document->createAttribute( 'data-wp-block' ); - $attrs->value = $serialized_attributes; - - $block_name_obj = $document->createAttribute( 'data-wp-block-name' ); - $block_name_obj->value = $block_name; - - $root_node->appendChild( $attrs ); - $root_node->appendChild( $block_name_obj ); + $root_node->setAttribute( 'data-wp-block-name', $block_name ); + $root_node->setAttribute( 'data-wp-block', $serialized_attributes ); /** * Filter the block's DOMElement before rendering @@ -476,14 +502,14 @@ public function render_block( $html, $block, $block_instance ) { /** * Filter out the block attributes after serialization * - * @param string $encoded_attrs The serialized block attributes + * @param string $encoded_attrs The block attributes serialized to a JSON string * @param array $attrs The block attributes * @param array $block The block schema * @param WP_Block $block_instance The block instance */ $block_attrs_serialized = apply_filters( 'tenup_headless_wp_render_blocks_attrs_serialized', - esc_attr( wp_json_encode( $block_attrs ) ), + wp_json_encode( $block_attrs ), $block_attrs, $block, $block_instance diff --git a/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php b/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php index 3279bbdc5..ab39924ce 100644 --- a/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php +++ b/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php @@ -167,10 +167,10 @@ public function test_handle_multi_byte_html_encoding() { MARKUP ); $dom_expected = <<The temperature is 23°C ☀️ (sun emoji) and © (copyright symbol). HTML entity for Degrees: °.

+

The temperature is 23°C ☀️ (sun emoji) and © (copyright symbol). HTML entity for Degrees: °.

RESULT; $html_tag_api_expected = <<The temperature is 23°C ☀️ (sun emoji) and © (copyright symbol). HTML entity for Degrees: °.

+

The temperature is 23°C ☀️ (sun emoji) and © (copyright symbol). HTML entity for Degrees: °.

RESULT; $dom_output = $this->parser->render_block( $html, $block, $instance ); @@ -309,6 +309,212 @@ public function test_render_html_tag_api( array $incoming, array $block_structur remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' ); } + /** + * Tests that HTML entities in block attributes are preserved correctly with tag processor + * + * @return void + */ + public function test_html_entities_are_double_encoded() { + // Test with content containing HTML entities + // (and a ' to ensure that it is not serialized as a single-quote string + // by WP_HTML_Tag_Processor) + $markup = 'content'; + $block = $this->core_render_block_from_markup( $markup ); + $enhanced_block = $this->parser->render_block( $block['html'], $block['parsed_block'], $block['instance'] ); + + // Any HTML entities in JSON strings should be double-encoded + $this->assertStringContainsString( + 'data-wp-block="{"content":"&lt;script&gt;alert(&#039;xss&#039;)&lt;\/script&gt;', + $enhanced_block + ); + } + + /** + * Tests that HTML entities in block attributes are preserved correctly with tag processor + * + * @return void + */ + public function test_html_entities_are_double_encoded_using_WP_HTML_Tag_Processor() { + add_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' ); + $this->test_html_entities_are_double_encoded(); + remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' ); + } + + /** + * Data provider for block roundtrip tests + * + * @return array + */ + public function block_roundtrip_data_provider() { + $test_cases = [ + 'block value containing no special characters' => [ + 'core/heading', + [ + 'x' => 'hi', + 'level' => 2, + ], + '

', + ], + 'block value containing named character reference '' => [ + 'core/heading', + [ + 'x' => ''', + 'level' => 2, + ], + '

', + ], + 'block value containing lone apostrophe \' (from ENT_HTML5)' => [ + 'core/heading', + [ + 'x' => '\'', + 'level' => 2, + ], + '

', + ], + 'block value containing lone quote " (from ENT_COMPAT)' => [ + 'core/heading', + [ + 'x' => '"', + 'level' => 2, + ], + '

', + ], + 'block value containing named character reference "' => [ + 'core/heading', + [ + 'x' => '"', + 'level' => 2, + ], + '

', + ], + 'block value containing lone ampersand &' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + '

', + ], + 'block value containing named character reference &' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + '

', + ], + 'block value containing hexadecimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + '

', + ], + 'block value containing leading zero hexadecimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + '

', + ], + 'block value containing decimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + '

', + ], + 'block value containing leading zero decimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + '

', + ], + 'html_entities' => [ + 'core/heading', + [ + 'content' => '<script>alert('xss')</script>', + 'level' => 2, + ], + '

<script>alert(\'xss\')</script>

', + ], + 'complex_attributes' => [ + 'core/image', + [ + 'id' => 28, + 'sizeSlug' => 'large', + 'linkDestination' => 'none', + 'alt' => '', + ], + '
', + ], + 'special_characters' => [ + 'core/quote', + [ + 'citation' => 'Author "Name" & Co.', + 'value' => '

Quote with "quotes" & ampersands

', + ], + '

Quote with "quotes" & ampersands

Author "Name" & Co.
', + ], + ]; + $test_cases_with_or_without_tag_processor = []; + foreach ( $test_cases as $name => $case ) { + $test_cases_with_or_without_tag_processor[ "$name with WP_HTML_Tag_Processor" ] = array_merge( $case, [ true ] ); + $test_cases_with_or_without_tag_processor[ "$name with DomDocument" ] = array_merge( $case, [ false ] ); + } + return $test_cases_with_or_without_tag_processor; + } + + /** + * Tests that block attributes can be round-tripped correctly + * + * @dataProvider block_roundtrip_data_provider + * + * @param string $expected_block_name The expected block name + * @param array $expected_attributes The expected block attributes + * @param string $markup The block markup to test + * @param bool $use_tag_processor Whether to use the tag processor + * @return void + */ + public function test_block_attributes_roundtrip( $expected_block_name, $expected_attributes, $markup, $use_tag_processor ) { + $block = $this->core_render_block_from_markup( $markup ); + $tag_processor_function = $use_tag_processor ? '__return_true' : '__return_false'; + add_filter( 'tenup_headless_wp_render_block_use_tag_processor', $tag_processor_function ); + try { + $enhanced_block = $this->parser->render_block( $block['html'], $block['parsed_block'], $block['instance'] ); + } finally { + remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', $tag_processor_function ); + } + + // Parse the enhanced block using DOMDocument to extract data-wp-block and data-wp-block-name + $doc = new \DOMDocument(); + $success = $doc->loadHTML( $enhanced_block, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); + + $this->assertTrue( $success, 'DOMDocument should successfully parse the enhanced block HTML' ); + + // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + $root_element = $doc->documentElement; + $this->assertNotNull( $root_element, 'Should have a root element' ); + + $block_name_attr = $root_element->getAttribute( 'data-wp-block-name' ); + $block_data_attr = $root_element->getAttribute( 'data-wp-block' ); + + $this->assertNotEmpty( $block_name_attr, 'data-wp-block-name attribute should be present' ); + $this->assertNotEmpty( $block_data_attr, 'data-wp-block attribute should be present' ); + + // Parse JSON - DOMDocument should have already handled HTML entity decoding + $parsed_attributes = json_decode( $block_data_attr, true ); + + $this->assertIsArray( $parsed_attributes, 'Block data should decode to valid JSON array' ); + $this->assertEquals( $expected_block_name, $block_name_attr, 'Block name should match expected' ); + $this->assertEquals( $expected_attributes, $parsed_attributes, 'Block attributes should match expected (encoded: ' . $enhanced_block . ')' ); + } + /** * Tests block's rendering Synced Patterns which use another post to store the patterns content * - Run separate to hook the Parser filter on all render_block processing, required for nested blocks