Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions src/wp-includes/comment.php
Original file line number Diff line number Diff line change
Expand Up @@ -4271,3 +4271,81 @@ function wp_create_initial_comment_meta() {
)
);
}

/**
* Strips inline note markers from rendered block output.
*
* Inline notes - notes anchored to a text selection within a block rather than
* the whole block - are anchored in raw block content with
* `<mark class="wp-note" data-id="N">...</mark>` so the marker survives edits,
* but the public HTML should not expose note metadata. This filter unwraps the
* marker entirely - dropping the `<mark>` open tag and its matching closer while
* keeping the marked text - so nothing leaks to the front end. The raw
* `post_content` (and the REST `raw` view, revisions, exports) keeps the marker
* so the editor can re-attach it on reload.
*
* Only note markers are unwrapped: {@see WP_HTML_Tag_Processor::has_class()}
* matches the `wp-note` class by exact token, so a `<mark>` a user or plugin
* added (e.g. a `core/text-color` highlight, or an unrelated `wp-note-foo`
* class) is never flagged and survives byte-for-byte with all of its attributes
* intact. A naive regex would be wrong here: a `\bwp-note\b` word boundary also
* matches `wp-note-foo`, which is why the class check goes through the HTML API
* instead.
*
* The HTML API has no public token-removal method yet, so an anonymous
* {@see WP_HTML_Tag_Processor} subclass unwraps each note `<mark>` and its
* matching closer directly on the parsed token stream. Walking tokens - rather
* than matching `<mark>` with a regex - means a `</mark>`-looking sequence inside
* a comment or attribute value can never be mistaken for a real tag, and a
* nesting stack keeps each note opener paired with its own closer so overlapping
* notes and any user highlight `<mark>` left intact still resolve correctly.
*
* @since 7.1.0
*
* @param string $block_content Rendered block HTML.
* @return string Block HTML with `wp-note` markers unwrapped.
*/
function wp_strip_inline_note_markers( $block_content ) {
if ( ! str_contains( $block_content, 'wp-note' ) ) {
return $block_content;
}

// Anonymous subclass exposing token removal, which WP_HTML_Tag_Processor
// does not provide publicly yet. Removing the current token via its bookmark
// span unwraps the `<mark>` (opener or closer) while keeping the text it
// wraps.
$processor = new class( $block_content ) extends WP_HTML_Tag_Processor {
/**
* Removes the current token, keeping any text it wraps.
*/
public function remove_token() {
// Always called after next_tag() returned true, so the bookmark is set.
$this->set_bookmark( 'here' );
$span = $this->bookmarks['here'];

$this->lexical_updates[] = new WP_HTML_Text_Replacement( $span->start, $span->length, '' );
}
};

// Walk every `<mark>`, tracking note nesting on a stack so each note opener
// pairs with its own closer, and unwrap only the note markers.
$mark_stack = array();
$query = array(
'tag_name' => 'MARK',
'tag_closers' => 'visit',
);
while ( $processor->next_tag( $query ) ) {
if ( $processor->is_tag_closer() ) {
$is_note = array_pop( $mark_stack );
} else {
$is_note = $processor->has_class( 'wp-note' );
$mark_stack[] = $is_note;
}

if ( true === $is_note ) {
$processor->remove_token();
}
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this code assumes well-formed markup, which is fair for this task. the HTML Processor would guarantee that assumption, but would occasionally abort in certain cases of ill-formed markup.

everything else looks fine, though WPCS is going to complain about making multi-line comments without /* and */ — it prefers polluting diffs and making a big deal out of minor edits.


return $processor->get_updated_html();
}
3 changes: 3 additions & 0 deletions src/wp-includes/default-filters.php
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,9 @@
// Fluid typography.
add_filter( 'render_block', 'wp_render_typography_support', 10, 2 );

// Inline note markers.
add_filter( 'render_block', 'wp_strip_inline_note_markers' );

// User preferences.
add_action( 'init', 'wp_register_persisted_preferences_meta' );

Expand Down
121 changes: 121 additions & 0 deletions tests/phpunit/tests/comment/stripInlineNoteMarkers.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
<?php

/**
* Tests that inline note markers are unwrapped in rendered block output via the
* render_block filter, while raw post content is left untouched.
*
* The `<mark class="wp-note">` wrapper is removed entirely - both the open tag
* and its matching closer - so no note marker or metadata reaches the public
* HTML, while the marked text (and any nested formatting) is preserved.
*
* @group comment
* @group notes
*
* @covers ::wp_strip_inline_note_markers
*/
class Tests_Comment_StripInlineNoteMarkers extends WP_UnitTestCase {

public function test_strip_unwraps_marker_from_mark() {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add @ticket annotation for all new tests?

$html = '<p>Hello <mark class="wp-note" data-id="7">marked</mark> world</p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>Hello marked world</p>', $stripped );
}

public function test_strip_handles_multiple_markers_in_one_block() {
$html = '<p><mark class="wp-note" data-id="1">a</mark> and <mark class="wp-note" data-id="2">b</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>a and b</p>', $stripped );
}

public function test_strip_passes_through_block_content_without_markers() {
$html = '<p>Plain text with no notes here.</p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( $html, $stripped );
}

public function test_strip_keeps_other_classes_when_removing_wp_note() {
// The whole wrapper is removed, so any companion classes go with it.
$html = '<p><mark class="custom wp-note other" data-id="3">x</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>x</p>', $stripped );
}

public function test_strip_leaves_unrelated_marks_untouched() {
// A user highlight (`core/text-color`) serializes as a plain `<mark>` and
// must survive untouched.
$html = '<p><mark style="background-color:#ff0">keep me</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( $html, $stripped );
}

public function test_strip_does_not_match_partial_class_names() {
// `wp-note-foo` is a different class and must not be treated as a marker;
// a regex word boundary would incorrectly match it.
$html = '<p><mark class="wp-note-foo">keep me</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( $html, $stripped );
}

public function test_strip_preserves_user_mark_attributes_next_to_note() {
// A user/plugin `<mark>` with several attributes sitting beside a note
// marker must be returned byte-for-byte; only the `wp-note` wrapper goes.
$html = '<p><mark class="highlight" style="background-color:#ff0" data-id="99" title="kept">user</mark> and <mark class="wp-note" data-id="4">noted</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p><mark class="highlight" style="background-color:#ff0" data-id="99" title="kept">user</mark> and noted</p>', $stripped );
}

public function test_strip_preserves_nested_formatting() {
// A note wrapping already-formatted text (e.g. coloured text) serializes
// with nested inline elements. The wrapper is removed while the inner
// markup is preserved intact.
$html = '<p><mark class="wp-note" data-id="1">a <span style="color:red">red</span> b</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>a <span style="color:red">red</span> b</p>', $stripped );
}

public function test_strip_unwraps_note_but_keeps_inner_highlight_mark() {
// A note wrapping a user highlight nests `<mark>` inside `<mark>`. Only the
// note wrapper is removed; the inner highlight `<mark>` is preserved, and
// the closer pairing must not unbalance.
$html = '<p><mark class="wp-note" data-id="1">a <mark style="background-color:#ff0">hi</mark> b</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>a <mark style="background-color:#ff0">hi</mark> b</p>', $stripped );
}

public function test_strip_handles_overlapping_nested_note_markers() {
// Two notes anchored on overlapping text serialize as nested `<mark>`s.
// Both wrappers are removed and the text survives.
$html = '<p><mark class="wp-note" data-id="1">a<mark class="wp-note" data-id="2">b</mark>c</mark></p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>abc</p>', $stripped );
}

public function test_strip_ignores_mark_like_text_inside_a_comment() {
// A `</mark>` sequence inside an HTML comment is text, not a tag. Walking
// the parsed token stream ignores it; a raw regex over the string would
// mistake it for the note's closer, unbalance the pairing, and corrupt
// both the comment and the real wrapper.
$html = '<p><mark class="wp-note" data-id="1">a<!-- </mark> -->b</mark>tail</p>';
$stripped = wp_strip_inline_note_markers( $html );

$this->assertSame( '<p>a<!-- </mark> -->btail</p>', $stripped );
}

public function test_strip_filter_is_registered_on_render_block() {
// Guards against future hook rewiring that would silently leave
// inline-note markers in rendered output.
$this->assertNotFalse(
has_filter( 'render_block', 'wp_strip_inline_note_markers' )
);
}
}
Loading