diff --git a/.changeset/weak-cloths-lose.md b/.changeset/weak-cloths-lose.md
new file mode 100644
index 000000000..b81bac9d0
--- /dev/null
+++ b/.changeset/weak-cloths-lose.md
@@ -0,0 +1,5 @@
+---
+"@headstartwp/headstartwp": patch
+---
+
+Fix how data-wp-block attribute is set to avoid generating incorrect/insecure markup
diff --git a/docs/documentation/06-WordPress Integration/gutenberg.md b/docs/documentation/06-WordPress Integration/gutenberg.md
index 699fe06f8..02ac173f5 100644
--- a/docs/documentation/06-WordPress Integration/gutenberg.md
+++ b/docs/documentation/06-WordPress Integration/gutenberg.md
@@ -40,14 +40,14 @@ This filter is not as useful as the previous one but it allows you to filter the
/**
* Filter's out the block's attributes after serialization
*
- * @param string $encoded_attrs The serialized block's Attributes
+ * @param string $encoded_attrs The block attributes serialized to a JSON string
* @param array $attrs The Block's Attributes
* @param array $block The Block's schema
* @param \WP_Block $block_instance The block's instance
*/
$block_attrs_serialized = apply_filters(
'tenup_headless_wp_render_blocks_attrs_serialized',
- esc_attr( wp_json_encode( $block_attrs ) ),
+ wp_json_encode( $block_attrs ),
$block_attrs,
$block,
$block_instance
diff --git a/wp/headless-wp/includes/classes/Integrations/Gutenberg.php b/wp/headless-wp/includes/classes/Integrations/Gutenberg.php
index a4e8c8b9a..cf6f50848 100644
--- a/wp/headless-wp/includes/classes/Integrations/Gutenberg.php
+++ b/wp/headless-wp/includes/classes/Integrations/Gutenberg.php
@@ -304,12 +304,37 @@ public function process_block_with_dom_document_api( $html, $block_name, $block_
}
}
+ /**
+ * Set the block attributes in the HTML
+ *
+ * This is a workaround to avoid the issue with the HTML_Tag_Processor API not handling JSON with HTML in attributes.
+ *
+ * @see https://github.com/10up/headstartwp/pull/921
+ *
+ * @param string $placeholder The placeholder for the block attributes
+ * @param string $html The block markup
+ * @param string $block_attrs_serialized The block attributes serialized to a JSON string
+ *
+ * @return string The processed html
+ */
+ public function set_block_attributes_tag_api( $placeholder, $html, $block_attrs_serialized ) {
+ $search = sprintf( '/data-wp-block="%s"/', preg_quote( $placeholder, '/' ) );
+ $replace = sprintf( 'data-wp-block="%s"', htmlspecialchars( $block_attrs_serialized ) );
+
+ // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped
+ return preg_replace(
+ $search,
+ $replace,
+ $html
+ );
+ }
+
/**
* Process the block with the WP_HTML_Tag_Processor
*
* @param string $html The block markup
* @param string $block_name The block name
- * @param string $block_attrs_serialized The serialized block attributes
+ * @param string $block_attrs_serialized The block attributes serialized to a JSON string
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*
@@ -321,7 +346,14 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr
if ( ! $this->bypass_block_attributes( $block_name, $block_instance ) && $doc->next_tag() ) {
$doc->set_attribute( 'data-wp-block-name', $block_name );
- $doc->set_attribute( 'data-wp-block', $block_attrs_serialized );
+ $placeholder = '___HEADSTARTWP_BLOCK_ATTRS___';
+ $doc->set_attribute( 'data-wp-block', $placeholder );
+
+ $intermediate_html = $doc->get_updated_html();
+ $intermediate_html = $this->set_block_attributes_tag_api( $placeholder, $intermediate_html, $block_attrs_serialized );
+
+ $doc = new WP_HTML_Tag_Processor( $intermediate_html );
+ $doc->next_tag();
/**
* Filter the block before rendering
@@ -347,7 +379,7 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr
*
* @param string $html The block markup
* @param string $block_name The block name
- * @param string $serialized_attributes Serialized attributes
+ * @param string $serialized_attributes The block attributes serialized to a JSON string
* @param array $block The block array
* @param WP_Block $block_instance The block instance
*
@@ -365,14 +397,8 @@ public function process_dom_document_block(
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$root_node = $document->documentElement;
- $attrs = $document->createAttribute( 'data-wp-block' );
- $attrs->value = $serialized_attributes;
-
- $block_name_obj = $document->createAttribute( 'data-wp-block-name' );
- $block_name_obj->value = $block_name;
-
- $root_node->appendChild( $attrs );
- $root_node->appendChild( $block_name_obj );
+ $root_node->setAttribute( 'data-wp-block-name', $block_name );
+ $root_node->setAttribute( 'data-wp-block', $serialized_attributes );
/**
* Filter the block's DOMElement before rendering
@@ -476,14 +502,14 @@ public function render_block( $html, $block, $block_instance ) {
/**
* Filter out the block attributes after serialization
*
- * @param string $encoded_attrs The serialized block attributes
+ * @param string $encoded_attrs The block attributes serialized to a JSON string
* @param array $attrs The block attributes
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*/
$block_attrs_serialized = apply_filters(
'tenup_headless_wp_render_blocks_attrs_serialized',
- esc_attr( wp_json_encode( $block_attrs ) ),
+ wp_json_encode( $block_attrs ),
$block_attrs,
$block,
$block_instance
diff --git a/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php b/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php
index 3279bbdc5..ab39924ce 100644
--- a/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php
+++ b/wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php
@@ -167,10 +167,10 @@ public function test_handle_multi_byte_html_encoding() {
MARKUP
);
$dom_expected = <<
The temperature is 23°C ☀️ (sun emoji) and © (copyright symbol). HTML entity for Degrees: °.
RESULT; $html_tag_api_expected = <<The temperature is 23°C ☀️ (sun emoji) and © (copyright symbol). HTML entity for Degrees: °.
RESULT; $dom_output = $this->parser->render_block( $html, $block, $instance ); @@ -309,6 +309,212 @@ public function test_render_html_tag_api( array $incoming, array $block_structur remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' ); } + /** + * Tests that HTML entities in block attributes are preserved correctly with tag processor + * + * @return void + */ + public function test_html_entities_are_double_encoded() { + // Test with content containing HTML entities + // (and a ' to ensure that it is not serialized as a single-quote string + // by WP_HTML_Tag_Processor) + $markup = 'content'; + $block = $this->core_render_block_from_markup( $markup ); + $enhanced_block = $this->parser->render_block( $block['html'], $block['parsed_block'], $block['instance'] ); + + // Any HTML entities in JSON strings should be double-encoded + $this->assertStringContainsString( + 'data-wp-block="{"content":"<script>alert('xss')<\/script>', + $enhanced_block + ); + } + + /** + * Tests that HTML entities in block attributes are preserved correctly with tag processor + * + * @return void + */ + public function test_html_entities_are_double_encoded_using_WP_HTML_Tag_Processor() { + add_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' ); + $this->test_html_entities_are_double_encoded(); + remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' ); + } + + /** + * Data provider for block roundtrip tests + * + * @return array + */ + public function block_roundtrip_data_provider() { + $test_cases = [ + 'block value containing no special characters' => [ + 'core/heading', + [ + 'x' => 'hi', + 'level' => 2, + ], + ' ', + ], + 'block value containing named character reference '' => [ + 'core/heading', + [ + 'x' => ''', + 'level' => 2, + ], + ' ', + ], + 'block value containing lone apostrophe \' (from ENT_HTML5)' => [ + 'core/heading', + [ + 'x' => '\'', + 'level' => 2, + ], + ' ', + ], + 'block value containing lone quote " (from ENT_COMPAT)' => [ + 'core/heading', + [ + 'x' => '"', + 'level' => 2, + ], + ' ', + ], + 'block value containing named character reference "' => [ + 'core/heading', + [ + 'x' => '"', + 'level' => 2, + ], + ' ', + ], + 'block value containing lone ampersand &' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + ' ', + ], + 'block value containing named character reference &' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + ' ', + ], + 'block value containing hexadecimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + ' ', + ], + 'block value containing leading zero hexadecimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + ' ', + ], + 'block value containing decimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + ' ', + ], + 'block value containing leading zero decimal numeric character reference & (should not be converted to &)' => [ + 'core/heading', + [ + 'x' => '&', + 'level' => 2, + ], + ' ', + ], + 'html_entities' => [ + 'core/heading', + [ + 'content' => '<script>alert('xss')</script>', + 'level' => 2, + ], + '
Quote with "quotes" & ampersands
', + ], + '', + ], + ]; + $test_cases_with_or_without_tag_processor = []; + foreach ( $test_cases as $name => $case ) { + $test_cases_with_or_without_tag_processor[ "$name with WP_HTML_Tag_Processor" ] = array_merge( $case, [ true ] ); + $test_cases_with_or_without_tag_processor[ "$name with DomDocument" ] = array_merge( $case, [ false ] ); + } + return $test_cases_with_or_without_tag_processor; + } + + /** + * Tests that block attributes can be round-tripped correctly + * + * @dataProvider block_roundtrip_data_provider + * + * @param string $expected_block_name The expected block name + * @param array $expected_attributes The expected block attributes + * @param string $markup The block markup to test + * @param bool $use_tag_processor Whether to use the tag processor + * @return void + */ + public function test_block_attributes_roundtrip( $expected_block_name, $expected_attributes, $markup, $use_tag_processor ) { + $block = $this->core_render_block_from_markup( $markup ); + $tag_processor_function = $use_tag_processor ? '__return_true' : '__return_false'; + add_filter( 'tenup_headless_wp_render_block_use_tag_processor', $tag_processor_function ); + try { + $enhanced_block = $this->parser->render_block( $block['html'], $block['parsed_block'], $block['instance'] ); + } finally { + remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', $tag_processor_function ); + } + + // Parse the enhanced block using DOMDocument to extract data-wp-block and data-wp-block-name + $doc = new \DOMDocument(); + $success = $doc->loadHTML( $enhanced_block, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); + + $this->assertTrue( $success, 'DOMDocument should successfully parse the enhanced block HTML' ); + + // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + $root_element = $doc->documentElement; + $this->assertNotNull( $root_element, 'Should have a root element' ); + + $block_name_attr = $root_element->getAttribute( 'data-wp-block-name' ); + $block_data_attr = $root_element->getAttribute( 'data-wp-block' ); + + $this->assertNotEmpty( $block_name_attr, 'data-wp-block-name attribute should be present' ); + $this->assertNotEmpty( $block_data_attr, 'data-wp-block attribute should be present' ); + + // Parse JSON - DOMDocument should have already handled HTML entity decoding + $parsed_attributes = json_decode( $block_data_attr, true ); + + $this->assertIsArray( $parsed_attributes, 'Block data should decode to valid JSON array' ); + $this->assertEquals( $expected_block_name, $block_name_attr, 'Block name should match expected' ); + $this->assertEquals( $expected_attributes, $parsed_attributes, 'Block attributes should match expected (encoded: ' . $enhanced_block . ')' ); + } + /** * Tests block's rendering Synced Patterns which use another post to store the patterns content * - Run separate to hook the Parser filter on all render_block processing, required for nested blocksQuote with "quotes" & ampersands
Author "Name" & Co.